Example #1
0
 def _set_units(self):
     self.unit_registry = UnitRegistry()
     self.time_unit = self.quan(1.0, "s")
     if self.cosmological_simulation:
         # Instantiate Cosmology object for units and time conversions.
         self.cosmology = \
           Cosmology(hubble_constant=self.hubble_constant,
                     omega_matter=self.omega_matter,
                     omega_lambda=self.omega_lambda,
                     unit_registry=self.unit_registry)
         self.unit_registry.modify("h", self.hubble_constant)
         # Comoving lengths
         for my_unit in ["m", "pc", "AU", "au"]:
             new_unit = "%scm" % my_unit
             # technically not true, but should be ok
             self.unit_registry.add(new_unit,
                                    self.unit_registry.lut[my_unit][0],
                                    dimensions.length,
                                    "\\rm{%s}/(1+z)" % my_unit)
         self.length_unit = self.quan(self.unit_base["UnitLength_in_cm"],
                                      "cmcm / h",
                                      registry=self.unit_registry)
         self.box_size *= self.length_unit.in_units("Mpccm / h")
     else:
         # Read time from file for non-cosmological sim
         self.time_unit = self.quan(
             self.unit_base["UnitLength_in_cm"]/ \
                 self.unit_base["UnitVelocity_in_cm_per_s"], "s")
         self.unit_registry.add("code_time", 1.0, dimensions.time)
         self.unit_registry.modify("code_time", self.time_unit)
         # Length
         self.length_unit = self.quan(self.unit_base["UnitLength_in_cm"],
                                      "cm")
         self.unit_registry.add("code_length", 1.0, dimensions.length)
         self.unit_registry.modify("code_length", self.length_unit)
    def _set_units(self):
        self.unit_registry = UnitRegistry()
        self.unit_registry.lut["code_time"] = (1.0, dimensions.time)
        if self.cosmological_simulation:
            # Instantiate EnzoCosmology object for units and time conversions.
            self.cosmology = \
              EnzoCosmology(self.parameters['CosmologyHubbleConstantNow'],
                            self.parameters['CosmologyOmegaMatterNow'],
                            self.parameters['CosmologyOmegaLambdaNow'],
                            0.0, self.parameters['CosmologyInitialRedshift'],
                            unit_registry=self.unit_registry)

            self.time_unit = self.cosmology.time_unit.in_units("s")
            self.unit_registry.modify("h", self.hubble_constant)
            # Comoving lengths
            for my_unit in ["m", "pc", "AU", "au"]:
                new_unit = "%scm" % my_unit
                # technically not true, but should be ok
                self.unit_registry.add(new_unit,
                                       self.unit_registry.lut[my_unit][0],
                                       dimensions.length,
                                       "\\rm{%s}/(1+z)" % my_unit)
            self.length_unit = self.quan(self.box_size,
                                         "Mpccm / h",
                                         registry=self.unit_registry)
            self.box_size = self.length_unit
        else:
            self.time_unit = self.quan(self.parameters["TimeUnits"], "s")
        self.unit_registry.modify("code_time", self.time_unit)
Example #3
0
 def unit_registry(self):
     """
     Unit system registry.
     """
     if self._unit_registry is None:
         self._unit_registry = UnitRegistry()
     return self._unit_registry
Example #4
0
 def fget(self):
     ur = self._unit_registry
     if ur is None:
         ur = UnitRegistry()
         # This will be updated when we add a volume source
         ur.add("unitary", 1.0, length)
     self._unit_registry = ur
     return self._unit_registry
Example #5
0
 def _create_unit_registry(self):
     self.unit_registry = UnitRegistry()
     import yt.units.dimensions as dimensions
     self.unit_registry.add("code_length", 1.0, dimensions.length)
     self.unit_registry.add("code_mass", 1.0, dimensions.mass)
     self.unit_registry.add("code_time", 1.0, dimensions.time)
     self.unit_registry.add("code_magnetic", 1.0, dimensions.magnetic_field)
     self.unit_registry.add("code_temperature", 1.0, dimensions.temperature)
     self.unit_registry.add("code_velocity", 1.0, dimensions.velocity)
     self.unit_registry.add("code_metallicity", 1.0,
                            dimensions.dimensionless)
Example #6
0
    def __init__(self, filename):
        """
        Initialize an Arbor given an input file.
        """

        self.filename = filename
        self.basename = os.path.basename(filename)
        self.unit_registry = UnitRegistry()
        self._parse_parameter_file()
        self._set_units()
        self._root_field_data = FieldContainer(self)
        self._setup_fields()
        self._set_default_selector()
        self._node_io = self._tree_field_io_class(self)
        self._root_io = self._root_field_io_class(self)
 def _set_units(self):
     self.unit_registry = UnitRegistry()
     self.time_unit = self.quan(1.0, "s")
     if self.cosmological_simulation:
         # Instantiate Cosmology object for units and time conversions.
         self.cosmology = \
           Cosmology(hubble_constant=self.hubble_constant,
                     omega_matter=self.omega_matter,
                     omega_lambda=self.omega_lambda,
                     unit_registry=self.unit_registry)
         self.unit_registry.modify("h", self.hubble_constant)
         # Comoving lengths
         for my_unit in ["m", "pc", "AU", "au"]:
             new_unit = "%scm" % my_unit
             # technically not true, but should be ok
             self.unit_registry.add(
                 new_unit, self.unit_registry.lut[my_unit][0],
                 dimensions.length, "\\rm{%s}/(1+z)" % my_unit)
         self.length_unit = self.quan(self.unit_base["UnitLength_in_cm"],
                                      "cmcm / h", registry=self.unit_registry)
         self.box_size *= self.length_unit.in_units("Mpccm / h")
     else:
         # Read time from file for non-cosmological sim
         self.time_unit = self.quan(
             self.unit_base["UnitLength_in_cm"]/ \
                 self.unit_base["UnitVelocity_in_cm_per_s"], "s")
         self.unit_registry.add("code_time", 1.0, dimensions.time)
         self.unit_registry.modify("code_time", self.time_unit)
         # Length
         self.length_unit = self.quan(
             self.unit_base["UnitLength_in_cm"],"cm")
         self.unit_registry.add("code_length", 1.0, dimensions.length)
         self.unit_registry.modify("code_length", self.length_unit)
Example #8
0
 def _parse_parameter_file(self):
     fh = h5py.File(self.filename, "r")
     for attr in ["hubble_constant", "omega_matter", "omega_lambda"]:
         setattr(self, attr, fh.attrs[attr])
     if "unit_registry_json" in fh.attrs:
         self.unit_registry = \
           UnitRegistry.from_json(
               fh.attrs["unit_registry_json"].astype(str))
     self.unit_registry.modify("h", self.hubble_constant)
     self.box_size = _hdf5_yt_attr(fh,
                                   "box_size",
                                   unit_registry=self.unit_registry)
     field_list = []
     fi = {}
     for field in fh["data"]:
         d = fh["data"][field]
         units = _hdf5_yt_attr(d, "units")
         if isinstance(units, bytes):
             units = units.decode("utf")
         if len(d.shape) > 1:
             for ax in "xyz":
                 my_field = "%s_%s" % (field, ax)
                 field_list.append(my_field)
                 fi[my_field] = {"vector": True, "units": units}
         else:
             field_list.append(field)
             fi[field] = {"units": units}
     fh.close()
     self.field_list = field_list
     self.field_info.update(fi)
class FakeDS:
    domain_left_edge = None
    domain_right_edge = None
    domain_width = None
    unit_registry = UnitRegistry()
    unit_registry.add('code_length', 1.0, dimensions.length)
    periodicity = (False, False, False)
 def mpi_bcast(self, data, root=0):
     # The second check below makes sure that we know how to communicate
     # this type of array. Otherwise, we'll pickle it.
     if isinstance(data, np.ndarray) and \
             get_mpi_type(data.dtype) is not None:
         if self.comm.rank == root:
             if isinstance(data, YTArray):
                 info = (data.shape, data.dtype, str(data.units),
                         data.units.registry.lut)
             else:
                 info = (data.shape, data.dtype)
         else:
             info = ()
         info = self.comm.bcast(info, root=root)
         if self.comm.rank != root:
             if len(info) == 4:
                 registry = UnitRegistry(lut=info[3],
                                         add_default_symbols=False)
                 data = YTArray(np.empty(info[0], dtype=info[1]),
                                info[2],
                                registry=registry)
             else:
                 data = np.empty(info[0], dtype=info[1])
         mpi_type = get_mpi_type(info[1])
         self.comm.Bcast([data, mpi_type], root=root)
         return data
     else:
         # Use pickled methods.
         data = self.comm.bcast(data, root=root)
         return data
    def _set_units(self):
        self.unit_registry = UnitRegistry()
        self.unit_registry.lut["code_time"] = (1.0, dimensions.time)
        if self.cosmological_simulation:
            # Instantiate EnzoCosmology object for units and time conversions.
            self.cosmology = \
              EnzoCosmology(self.parameters['CosmologyHubbleConstantNow'],
                            self.parameters['CosmologyOmegaMatterNow'],
                            self.parameters['CosmologyOmegaLambdaNow'],
                            0.0, self.parameters['CosmologyInitialRedshift'],
                            unit_registry=self.unit_registry)

            self.time_unit = self.cosmology.time_unit.in_units("s")
            self.unit_registry.modify("h", self.hubble_constant)
            # Comoving lengths
            for my_unit in ["m", "pc", "AU", "au"]:
                new_unit = "%scm" % my_unit
                # technically not true, but should be ok
                self.unit_registry.add(new_unit, self.unit_registry.lut[my_unit][0],
                                       dimensions.length, "\\rm{%s}/(1+z)" % my_unit)
            self.length_unit = self.quan(self.box_size, "Mpccm / h",
                                         registry=self.unit_registry)
            self.box_size = self.length_unit
        else:
            self.time_unit = self.quan(self.parameters["TimeUnits"], "s")
        self.unit_registry.modify("code_time", self.time_unit)
Example #12
0
    def from_hdf5(cls, filename, dataset_name=None):
        r"""Attempts read in and convert a dataset in an hdf5 file into a YTArray.

        Parameters
        ----------
        filename: string
        The filename to of the hdf5 file.

        dataset_name: string
            The name of the dataset to read from.  If the dataset has a units
            attribute, attempt to infer units as well.

        """
        import h5py
        from yt.extern.six.moves import cPickle as pickle

        if dataset_name is None:
            dataset_name = 'array_data'

        f = h5py.File(filename)
        dataset = f[dataset_name]
        data = dataset[:]
        units = dataset.attrs.get('units', '')
        if 'unit_registry' in dataset.attrs.keys():
            unit_lut = pickle.loads(dataset.attrs['unit_registry'].tostring())
        else:
            unit_lut = None
        f.close()
        registry = UnitRegistry(lut=unit_lut, add_default_symbols=False)
        return cls(data, units, registry=registry)
Example #13
0
    def _parse_parameter_file(self):
        self.refine_by = 2
        with h5py.File(self.parameter_filename, mode="r") as f:
            for key in f.attrs.keys():
                v = parse_h5_attr(f, key)
                if key == "con_args":
                    try:
                        v = eval(v)
                    except ValueError:
                        # support older ytdata outputs
                        v = v.astype('str')
                self.parameters[key] = v
            self._with_parameter_file_open(f)

        # if saved, restore unit registry from the json string
        if "unit_registry_json" in self.parameters:
            self.unit_registry = UnitRegistry.from_json(
                self.parameters["unit_registry_json"])
            # reset self.arr and self.quan to use new unit_registry
            self._arr = None
            self._quan = None
            for dim in [
                    "length", "mass", "pressure", "temperature", "time",
                    "velocity"
            ]:
                cu = "code_" + dim
                if cu not in self.unit_registry:
                    self.unit_registry.add(cu, 1.0, getattr(dimensions, dim))
            if "code_magnetic" not in self.unit_registry:
                self.unit_registry.add("code_magnetic", 1.0,
                                       dimensions.magnetic_field)

        # if saved, set unit system
        if "unit_system_name" in self.parameters:
            unit_system = self.parameters["unit_system_name"]
            del self.parameters["unit_system_name"]
        else:
            unit_system = "cgs"
        # reset unit system since we may have a new unit registry
        self._assign_unit_system(unit_system)

        # assign units to parameters that have associated unit string
        del_pars = []
        for par in self.parameters:
            ustr = "%s_units" % par
            if ustr in self.parameters:
                if isinstance(self.parameters[par], np.ndarray):
                    to_u = self.arr
                else:
                    to_u = self.quan
                self.parameters[par] = to_u(self.parameters[par],
                                            self.parameters[ustr])
                del_pars.append(ustr)
        for par in del_pars:
            del self.parameters[par]

        for attr in self._con_attrs:
            setattr(self, attr, self.parameters.get(attr))
 def __init__(self, hubble_constant = 0.71,
              omega_matter = 0.27,
              omega_lambda = 0.73,
              omega_curvature = 0.0,
              unit_registry = None):
     self.omega_matter = omega_matter
     self.omega_lambda = omega_lambda
     self.omega_curvature = omega_curvature
     if unit_registry is None:
         unit_registry = UnitRegistry()
         unit_registry.modify("h", hubble_constant)
         for my_unit in ["m", "pc", "AU", "au"]:
             new_unit = "%scm" % my_unit
             # technically not true, but distances here are actually comoving
             unit_registry.add(new_unit, unit_registry.lut[my_unit][0],
                               dimensions.length, "\\rm{%s}/(1+z)" % my_unit)
     self.unit_registry = unit_registry
     self.hubble_constant = self.quan(hubble_constant, "100*km/s/Mpc")
Example #15
0
    def __setstate__(self, state):
        """Pickle setstate method

        This is called inside pickle.read() and restores the unit data from the
        metadata extracted in __reduce__ and then serialized by pickle.
        """
        super(YTArray, self).__setstate__(state[1:])
        unit, lut = state[0]
        registry = UnitRegistry(lut=lut, add_default_symbols=False)
        self.units = Unit(unit, registry=registry)
Example #16
0
 def __deepcopy__(self, memodict=None):
     if memodict is None:
         memodict = {}
     expr = str(self.expr)
     base_value = copy.deepcopy(self.base_value)
     base_offset = copy.deepcopy(self.base_offset)
     dimensions = copy.deepcopy(self.dimensions)
     lut = copy.deepcopy(self.registry.lut)
     registry = UnitRegistry(lut=lut)
     return Unit(expr, base_value, base_offset, dimensions, registry)
 def recv_array(self, source, tag=0):
     metadata = self.comm.recv(source=source, tag=tag)
     dt, ne = metadata[:2]
     if ne is None and dt is None:
         return self.comm.recv(source=source, tag=tag)
     arr = np.empty(ne, dtype=dt)
     if len(metadata) == 4:
         registry = UnitRegistry(lut=metadata[3], add_default_symbols=False)
         arr = YTArray(arr, metadata[2], registry=registry)
     tmp = arr.view(self.__tocast)
     self.comm.Recv([tmp, MPI.CHAR], source=source, tag=tag)
     return arr
 def __init__(self, hubble_constant = 0.71,
              omega_matter = 0.27,
              omega_lambda = 0.73,
              omega_curvature = 0.0,
              unit_registry = None,
              unit_system = "cgs",
              use_dark_factor = False,
              w_0 = -1.0,
              w_a = 0.0):
     self.omega_matter = float(omega_matter)
     self.omega_lambda = float(omega_lambda)
     self.omega_curvature = float(omega_curvature)
     if unit_registry is None:
         unit_registry = UnitRegistry()
         unit_registry.modify("h", hubble_constant)
         for my_unit in ["m", "pc", "AU", "au"]:
             new_unit = "%scm" % my_unit
             # technically not true, but distances here are actually comoving
             unit_registry.add(new_unit, unit_registry.lut[my_unit][0],
                               dimensions.length, "\\rm{%s}/(1+z)" % my_unit)
     self.unit_registry = unit_registry
     self.hubble_constant = self.quan(hubble_constant, "100*km/s/Mpc")
     self.unit_system = unit_system
     
     # For non-standard dark energy. If false, use default cosmological constant
     # This only affects the expansion_factor function.
     self.use_dark_factor = use_dark_factor
     self.w_0 = w_0
     self.w_a = w_a
 def _create_unit_registry(self):
     self.unit_registry = UnitRegistry()
     import yt.units.dimensions as dimensions
     self.unit_registry.add("code_length", 1.0, dimensions.length)
     self.unit_registry.add("code_mass", 1.0, dimensions.mass)
     self.unit_registry.add("code_density", 1.0, dimensions.density)
     self.unit_registry.add("code_time", 1.0, dimensions.time)
     self.unit_registry.add("code_magnetic", 1.0, dimensions.magnetic_field)
     self.unit_registry.add("code_temperature", 1.0, dimensions.temperature)
     self.unit_registry.add("code_pressure", 1.0, dimensions.pressure)
     self.unit_registry.add("code_velocity", 1.0, dimensions.velocity)
     self.unit_registry.add("code_metallicity", 1.0,
                            dimensions.dimensionless)
Example #20
0
    def _parse_parameter_file(self):
        fh = h5py.File(self.filename, "r")

        for attr in ["hubble_constant", "omega_matter", "omega_lambda"]:
            setattr(self, attr, fh.attrs[attr])

        my_ur = UnitRegistry.from_json(parse_h5_attr(fh, "unit_registry_json"))
        right = _hdf5_yt_attr(fh, "domain_right_edge", unit_registry=my_ur)
        left = _hdf5_yt_attr(fh, "domain_left_edge", unit_registry=my_ur)
        # Drop the "cm" suffix because all lengths will
        # be in comoving units.
        self.box_size = self.quan((right - left)[0].to("Mpccm/h"), "Mpc/h")
        fh.close()
Example #21
0
 def _parse_parameter_file(self):
     self._prefix = \
       self.filename[:self.filename.rfind(self._suffix)]
     fh = h5py.File(self.filename, "r")
     for attr in ["hubble_constant", "omega_matter", "omega_lambda"]:
         setattr(self, attr, fh.attrs[attr])
     if "unit_registry_json" in fh.attrs:
         self.unit_registry = \
           UnitRegistry.from_json(
               parse_h5_attr(fh, "unit_registry_json"))
     self.box_size = _hdf5_yt_attr(fh,
                                   "box_size",
                                   unit_registry=self.unit_registry)
     self.field_info.update(json.loads(parse_h5_attr(fh, "field_info")))
     self.field_list = list(self.field_info.keys())
     fh.close()
Example #22
0
    def _set_new_unit_registry(self, input_registry):
        self.unit_registry = UnitRegistry(add_default_symbols=False,
                                          lut=input_registry.lut)

        # Validate that the new unit registry makes sense
        current_scaling = self.unit_registry['unitary'][0]
        if current_scaling != input_registry['unitary'][0]:
            for source in self.sources.items():
                data_source = getattr(source, 'data_source', None)
                if data_source is None:
                    continue
                scaling = data_source.ds.unit_registry['unitary'][0]
                if scaling != current_scaling:
                    raise NotImplementedError(
                        "Simultaneously rendering data from datasets with "
                        "different units is not supported")
Example #23
0
 def __init__(self, hubble_constant = 0.71,
              omega_matter = 0.27,
              omega_lambda = 0.73,
              omega_curvature = 0.0,
              unit_registry = None):
     self.omega_matter = omega_matter
     self.omega_lambda = omega_lambda
     self.omega_curvature = omega_curvature
     if unit_registry is None:
         unit_registry = UnitRegistry()
         unit_registry.modify("h", hubble_constant)
         for my_unit in ["m", "pc", "AU", "au"]:
             new_unit = "%scm" % my_unit
             # technically not true, but distances here are actually comoving
             unit_registry.add(new_unit, unit_registry.lut[my_unit][0],
                               dimensions.length, "\\rm{%s}/(1+z)" % my_unit)
     self.unit_registry = unit_registry
     self.hubble_constant = self.quan(hubble_constant, "100*km/s/Mpc")
Example #24
0
    def __init__(
        self,
        hubble_constant=0.71,
        omega_matter=0.27,
        omega_lambda=0.73,
        omega_radiation=0.0,
        omega_curvature=0.0,
        unit_registry=None,
        unit_system="cgs",
        use_dark_factor=False,
        w_0=-1.0,
        w_a=0.0,
    ):
        self.omega_matter = float(omega_matter)
        self.omega_radiation = float(omega_radiation)
        self.omega_lambda = float(omega_lambda)
        self.omega_curvature = float(omega_curvature)
        hubble_constant = float(hubble_constant)
        if unit_registry is None:
            unit_registry = UnitRegistry(unit_system=unit_system)
            unit_registry.add("h", hubble_constant, dimensions.dimensionless,
                              r"h")
            for my_unit in ["m", "pc", "AU", "au"]:
                new_unit = f"{my_unit}cm"
                my_u = Unit(my_unit, registry=unit_registry)
                # technically not true, but distances here are actually comoving
                unit_registry.add(
                    new_unit,
                    my_u.base_value,
                    dimensions.length,
                    "\\rm{%s}/(1+z)" % my_unit,
                    prefixable=True,
                )
        self.unit_registry = unit_registry
        self.hubble_constant = self.quan(hubble_constant, "100*km/s/Mpc")
        self.unit_system = unit_system

        # For non-standard dark energy. If false, use default cosmological constant
        # This only affects the expansion_factor function.
        self.use_dark_factor = use_dark_factor
        self.w_0 = w_0
        self.w_a = w_a
    def __init__(self, simulation_ds=None, halos_ds=None, make_analytic=True, 
    omega_matter0=0.2726, omega_lambda0=0.7274, omega_baryon0=0.0456, hubble0=0.704, 
    sigma8=0.86, primordial_index=1.0, this_redshift=0, log_mass_min=None, 
    log_mass_max=None, num_sigma_bins=360, fitting_function=4):
        self.simulation_ds = simulation_ds
        self.halos_ds = halos_ds
        self.omega_matter0 = omega_matter0
        self.omega_lambda0 = omega_lambda0
        self.omega_baryon0 = omega_baryon0
        self.hubble0 = hubble0
        self.sigma8 = sigma8
        self.primordial_index = primordial_index
        self.this_redshift = this_redshift
        self.log_mass_min = log_mass_min
        self.log_mass_max = log_mass_max
        self.num_sigma_bins = num_sigma_bins
        self.fitting_function = fitting_function
        self.make_analytic = make_analytic
        self.make_simulated = False
        """
        If we want to make an analytic mass function, grab what we can from either the 
        halo file or the data set, and make sure that the user supplied everything else
        that is needed.
        """
        # If we don't have any datasets, make the analytic function with user values
        if simulation_ds is None and halos_ds is None:
            # Set a reasonable mass min and max if none were provided
            if log_mass_min is None:
                self.log_mass_min = 5
            if log_mass_max is None:
                self.log_mass_max = 16
        # If we're making the analytic function...
        if self.make_analytic == True:
            # Try to set cosmological parameters from the simulation dataset
            if simulation_ds is not None:
                self.omega_matter0 = self.simulation_ds.omega_matter
                self.omega_lambda0 = self.simulation_ds.omega_lambda
                self.hubble0 = self.simulation_ds.hubble_constant
                self.this_redshift = self.simulation_ds.current_redshift
                # Set a reasonable mass min and max if none were provided
                if log_mass_min is None:
                    self.log_mass_min = 5
                if log_mass_max is None:
                    self.log_mass_max = 16
            # If we have a halo dataset but not a simulation dataset, use that instead
            if simulation_ds is None and halos_ds is not None:
                self.omega_matter0 = self.halos_ds.omega_matter
                self.omega_lambda0 = self.halos_ds.omega_lambda
                self.hubble0 = self.halos_ds.hubble_constant
                self.this_redshift = self.halos_ds.current_redshift
                # If the user didn't specify mass min and max, set them from the halos
                if log_mass_min is None:
                    self.set_mass_from_halos("min_mass")
                if log_mass_max is None:
                    self.set_mass_from_halos("max_mass")
            # Do the calculations.
            self.sigmaM()
            self.dndm()
            # Return the mass array in M_solar rather than M_solar/h
            self.masses_analytic = YTArray(self.masses_analytic/self.hubble0, "Msun")
            # The halo arrays will already have yt units, but the analytic forms do 
            # not. If a dataset has been provided, use that to give them units. At the
            # same time, convert to comoving (Mpc)^-3
            if simulation_ds is not None:
                self.n_cumulative_analytic = simulation_ds.arr(self.n_cumulative_analytic, 
                                                          "(Mpccm)**(-3)")
            elif halos_ds is not None:
                self.n_cumulative_analytic = halos_ds.arr(self.n_cumulative_analytic, 
                                                          "(Mpccm)**(-3)")
            else:
                from yt.units.unit_registry import UnitRegistry
                from yt.units.dimensions import length
                hmf_registry = UnitRegistry()
                for my_unit in ["m", "pc", "AU", "au"]:
                    new_unit = "%scm" % my_unit
                    hmf_registry.add(new_unit, 
                                     hmf_registry.lut[my_unit][0] / 
                                     (1 + self.this_redshift),
                                     length, "\\rm{%s}/(1+z)" % my_unit)                         
                self.n_cumulative_analytic = YTArray(self.n_cumulative_analytic, 
                                                     "(Mpccm)**(-3)", 
                                                     registry=hmf_registry) 


        """
        If a halo file has been supplied, make a mass function for the simulated halos.
        """
        if halos_ds is not None:
            # Used to check if a simulated halo mass funciton exists to write out
            self.make_simulated=True
            # Calculate the simulated halo mass function
            self.create_sim_hmf()
class EnzoSimulation(SimulationTimeSeries):
    r"""
    Initialize an Enzo Simulation object.

    Upon creation, the parameter file is parsed and the time and redshift
    are calculated and stored in all_outputs.  A time units dictionary is
    instantiated to allow for time outputs to be requested with physical
    time units.  The get_time_series can be used to generate a
    DatasetSeries object.

    parameter_filename : str
        The simulation parameter file.
    find_outputs : bool
        If True, subdirectories within the GlobalDir directory are
        searched one by one for datasets.  Time and redshift
        information are gathered by temporarily instantiating each
        dataset.  This can be used when simulation data was created
        in a non-standard way, making it difficult to guess the
        corresponding time and redshift information.
        Default: False.

    Examples
    --------
    >>> import yt
    >>> es = yt.simulation("my_simulation.par", "Enzo")
    >>> es.get_time_series()
    >>> for ds in es:
    ...     print ds.current_time

    """
    def __init__(self, parameter_filename, find_outputs=False):
        self.simulation_type = "grid"
        self.key_parameters = ["stop_cycle"]
        SimulationTimeSeries.__init__(self,
                                      parameter_filename,
                                      find_outputs=find_outputs)

    def _set_units(self):
        self.unit_registry = UnitRegistry()
        self.unit_registry.lut["code_time"] = (1.0, dimensions.time)
        if self.cosmological_simulation:
            # Instantiate EnzoCosmology object for units and time conversions.
            self.cosmology = \
              EnzoCosmology(self.parameters['CosmologyHubbleConstantNow'],
                            self.parameters['CosmologyOmegaMatterNow'],
                            self.parameters['CosmologyOmegaLambdaNow'],
                            0.0, self.parameters['CosmologyInitialRedshift'],
                            unit_registry=self.unit_registry)

            self.time_unit = self.cosmology.time_unit.in_units("s")
            self.unit_registry.modify("h", self.hubble_constant)
            # Comoving lengths
            for my_unit in ["m", "pc", "AU", "au"]:
                new_unit = "%scm" % my_unit
                # technically not true, but should be ok
                self.unit_registry.add(new_unit,
                                       self.unit_registry.lut[my_unit][0],
                                       dimensions.length,
                                       "\\rm{%s}/(1+z)" % my_unit)
            self.length_unit = self.quan(self.box_size,
                                         "Mpccm / h",
                                         registry=self.unit_registry)
            self.box_size = self.length_unit
        else:
            self.time_unit = self.quan(self.parameters["TimeUnits"], "s")
        self.unit_registry.modify("code_time", self.time_unit)

    def get_time_series(self,
                        time_data=True,
                        redshift_data=True,
                        initial_time=None,
                        final_time=None,
                        initial_redshift=None,
                        final_redshift=None,
                        initial_cycle=None,
                        final_cycle=None,
                        times=None,
                        redshifts=None,
                        tolerance=None,
                        parallel=True,
                        setup_function=None):
        """
        Instantiate a DatasetSeries object for a set of outputs.

        If no additional keywords given, a DatasetSeries object will be
        created with all potential datasets created by the simulation.

        Outputs can be gather by specifying a time or redshift range
        (or combination of time and redshift), with a specific list of
        times or redshifts, a range of cycle numbers (for cycle based
        output), or by simply searching all subdirectories within the
        simulation directory.

        time_data : bool
            Whether or not to include time outputs when gathering
            datasets for time series.
            Default: True.
        redshift_data : bool
            Whether or not to include redshift outputs when gathering
            datasets for time series.
            Default: True.
        initial_time : tuple of type (float, str)
            The earliest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (5.0, "Gyr").  If None, the initial time of the 
            simulation is used.  This can be used in combination with 
            either final_time or final_redshift.
            Default: None.
        final_time : tuple of type (float, str)
            The latest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (13.7, "Gyr"). If None, the final time of the 
            simulation is used.  This can be used in combination with either 
            initial_time or initial_redshift.
            Default: None.
        times : tuple of type (float array, str)
            A list of times for which outputs will be found and the units 
            of those values.  For example, ([0, 1, 2, 3], "s").
            Default: None.
        initial_redshift : float
            The earliest redshift for outputs to be included.  If None,
            the initial redshift of the simulation is used.  This can be
            used in combination with either final_time or
            final_redshift.
            Default: None.
        final_redshift : float
            The latest redshift for outputs to be included.  If None,
            the final redshift of the simulation is used.  This can be
            used in combination with either initial_time or
            initial_redshift.
            Default: None.
        redshifts : array_like
            A list of redshifts for which outputs will be found.
            Default: None.
        initial_cycle : float
            The earliest cycle for outputs to be included.  If None,
            the initial cycle of the simulation is used.  This can
            only be used with final_cycle.
            Default: None.
        final_cycle : float
            The latest cycle for outputs to be included.  If None,
            the final cycle of the simulation is used.  This can
            only be used in combination with initial_cycle.
            Default: None.
        tolerance : float
            Used in combination with "times" or "redshifts" keywords,
            this is the tolerance within which outputs are accepted
            given the requested times or redshifts.  If None, the
            nearest output is always taken.
            Default: None.
        parallel : bool/int
            If True, the generated DatasetSeries will divide the work
            such that a single processor works on each dataset.  If an
            integer is supplied, the work will be divided into that
            number of jobs.
            Default: True.
        setup_function : callable, accepts a ds
            This function will be called whenever a dataset is loaded.

        Examples
        --------

        >>> import yt
        >>> es = yt.simulation("my_simulation.par", "Enzo")
        
        >>> es.get_time_series(initial_redshift=10, final_time=(13.7, "Gyr"), 
                               redshift_data=False)

        >>> es.get_time_series(redshifts=[3, 2, 1, 0])

        >>> es.get_time_series(final_cycle=100000)

        >>> # after calling get_time_series
        >>> for ds in es.piter():
        ...     p = ProjectionPlot(ds, 'x', "density")
        ...     p.save()

        >>> # An example using the setup_function keyword
        >>> def print_time(ds):
        ...     print ds.current_time
        >>> es.get_time_series(setup_function=print_time)
        >>> for ds in es:
        ...     SlicePlot(ds, "x", "Density").save()

        """

        if (initial_redshift is not None or \
            final_redshift is not None) and \
            not self.cosmological_simulation:
            raise InvalidSimulationTimeSeries(
                "An initial or final redshift has been given for a " +
                "noncosmological simulation.")

        if time_data and redshift_data:
            my_all_outputs = self.all_outputs
        elif time_data:
            my_all_outputs = self.all_time_outputs
        elif redshift_data:
            my_all_outputs = self.all_redshift_outputs
        else:
            raise InvalidSimulationTimeSeries(
                'Both time_data and redshift_data are False.')

        if not my_all_outputs:
            DatasetSeries.__init__(self, outputs=[], parallel=parallel)
            mylog.info("0 outputs loaded into time series.")
            return

        # Apply selection criteria to the set.
        if times is not None:
            my_outputs = self._get_outputs_by_key("time",
                                                  times,
                                                  tolerance=tolerance,
                                                  outputs=my_all_outputs)

        elif redshifts is not None:
            my_outputs = self._get_outputs_by_key("redshift",
                                                  redshifts,
                                                  tolerance=tolerance,
                                                  outputs=my_all_outputs)

        elif initial_cycle is not None or final_cycle is not None:
            if initial_cycle is None:
                initial_cycle = 0
            else:
                initial_cycle = max(initial_cycle, 0)
            if final_cycle is None:
                final_cycle = self.parameters['StopCycle']
            else:
                final_cycle = min(final_cycle, self.parameters['StopCycle'])

            my_outputs = my_all_outputs[int(
                ceil(
                    float(initial_cycle) / self.parameters['CycleSkipDataDump']
                )):(final_cycle / self.parameters['CycleSkipDataDump']) + 1]

        else:
            if initial_time is not None:
                if isinstance(initial_time, float):
                    initial_time = self.quan(initial_time, "code_time")
                elif isinstance(initial_time,
                                tuple) and len(initial_time) == 2:
                    initial_time = self.quan(*initial_time)
                elif not isinstance(initial_time, YTArray):
                    raise RuntimeError(
                        "Error: initial_time must be given as a float or " +
                        "tuple of (value, units).")
            elif initial_redshift is not None:
                my_initial_time = self.cosmology.t_from_z(initial_redshift)
            else:
                my_initial_time = self.initial_time

            if final_time is not None:
                if isinstance(final_time, float):
                    final_time = self.quan(final_time, "code_time")
                elif isinstance(final_time, tuple) and len(final_time) == 2:
                    final_time = self.quan(*final_time)
                elif not isinstance(final_time, YTArray):
                    raise RuntimeError(
                        "Error: final_time must be given as a float or " +
                        "tuple of (value, units).")
                my_final_time = final_time.in_units("s")
            elif final_redshift is not None:
                my_final_time = self.cosmology.t_from_z(final_redshift)
            else:
                my_final_time = self.final_time

            my_initial_time.convert_to_units("s")
            my_final_time.convert_to_units("s")
            my_times = np.array([a['time'] for a in my_all_outputs])
            my_indices = np.digitize([my_initial_time, my_final_time],
                                     my_times)
            if my_initial_time == my_times[my_indices[0] - 1]:
                my_indices[0] -= 1
            my_outputs = my_all_outputs[my_indices[0]:my_indices[1]]

        init_outputs = []
        for output in my_outputs:
            if os.path.exists(output['filename']):
                init_outputs.append(output['filename'])

        DatasetSeries.__init__(self,
                               outputs=init_outputs,
                               parallel=parallel,
                               setup_function=setup_function)
        mylog.info("%d outputs loaded into time series.", len(init_outputs))

    def _parse_parameter_file(self):
        """
        Parses the parameter file and establishes the various
        dictionaries.
        """

        self.conversion_factors = {}
        redshift_outputs = []

        # Let's read the file
        lines = open(self.parameter_filename).readlines()
        for line in (l.strip() for l in lines):
            if '#' in line: line = line[0:line.find('#')]
            if '//' in line: line = line[0:line.find('//')]
            if len(line) < 2: continue
            param, vals = (i.strip() for i in line.split("=", 1))
            # First we try to decipher what type of value it is.
            vals = vals.split()
            # Special case approaching.
            if "(do" in vals: vals = vals[:1]
            if len(vals) == 0:
                pcast = str  # Assume NULL output
            else:
                v = vals[0]
                # Figure out if it's castable to floating point:
                try:
                    float(v)
                except ValueError:
                    pcast = str
                else:
                    if any("." in v or "e" in v for v in vals):
                        pcast = float
                    elif v == "inf":
                        pcast = str
                    else:
                        pcast = int
            # Now we figure out what to do with it.
            if param.endswith("Units") and not param.startswith("Temperature"):
                dataType = param[:-5]
                # This one better be a float.
                self.conversion_factors[dataType] = float(vals[0])
            if param.startswith("CosmologyOutputRedshift["):
                index = param[param.find("[") + 1:param.find("]")]
                redshift_outputs.append({
                    'index': int(index),
                    'redshift': float(vals[0])
                })
            elif len(vals) == 0:
                vals = ""
            elif len(vals) == 1:
                vals = pcast(vals[0])
            else:
                vals = np.array([pcast(i) for i in vals if i != "-99999"])
            self.parameters[param] = vals
        self.refine_by = self.parameters["RefineBy"]
        self.dimensionality = self.parameters["TopGridRank"]
        if self.dimensionality > 1:
            self.domain_dimensions = self.parameters["TopGridDimensions"]
            if len(self.domain_dimensions) < 3:
                tmp = self.domain_dimensions.tolist()
                tmp.append(1)
                self.domain_dimensions = np.array(tmp)
            self.domain_left_edge = np.array(self.parameters["DomainLeftEdge"],
                                             "float64").copy()
            self.domain_right_edge = np.array(
                self.parameters["DomainRightEdge"], "float64").copy()
        else:
            self.domain_left_edge = np.array(self.parameters["DomainLeftEdge"],
                                             "float64")
            self.domain_right_edge = np.array(
                self.parameters["DomainRightEdge"], "float64")
            self.domain_dimensions = np.array(
                [self.parameters["TopGridDimensions"], 1, 1])

        if self.parameters["ComovingCoordinates"]:
            cosmo_attr = {
                'box_size': 'CosmologyComovingBoxSize',
                'omega_lambda': 'CosmologyOmegaLambdaNow',
                'omega_matter': 'CosmologyOmegaMatterNow',
                'hubble_constant': 'CosmologyHubbleConstantNow',
                'initial_redshift': 'CosmologyInitialRedshift',
                'final_redshift': 'CosmologyFinalRedshift'
            }
            self.cosmological_simulation = 1
            for a, v in cosmo_attr.items():
                if not v in self.parameters:
                    raise MissingParameter(self.parameter_filename, v)
                setattr(self, a, self.parameters[v])
        else:
            self.cosmological_simulation = 0
            self.omega_lambda = self.omega_matter = \
                self.hubble_constant = 0.0

        # make list of redshift outputs
        self.all_redshift_outputs = []
        if not self.cosmological_simulation: return
        for output in redshift_outputs:
            output['filename'] = os.path.join(
                self.parameters['GlobalDir'], "%s%04d" %
                (self.parameters['RedshiftDumpDir'], output['index']),
                "%s%04d" %
                (self.parameters['RedshiftDumpName'], output['index']))
            del output['index']
        self.all_redshift_outputs = redshift_outputs

    def _calculate_time_outputs(self):
        """
        Calculate time outputs and their redshifts if cosmological.
        """

        self.all_time_outputs = []
        if self.final_time is None or \
            not 'dtDataDump' in self.parameters or \
            self.parameters['dtDataDump'] <= 0.0:
            return []

        index = 0
        current_time = self.initial_time.copy()
        dt_datadump = self.quan(self.parameters['dtDataDump'], "code_time")
        while current_time <= self.final_time + dt_datadump:
            filename = os.path.join(
                self.parameters['GlobalDir'],
                "%s%04d" % (self.parameters['DataDumpDir'], index),
                "%s%04d" % (self.parameters['DataDumpName'], index))

            output = {
                'index': index,
                'filename': filename,
                'time': current_time.copy()
            }
            output['time'] = min(output['time'], self.final_time)
            if self.cosmological_simulation:
                output['redshift'] = self.cosmology.z_from_t(current_time)

            self.all_time_outputs.append(output)
            if np.abs(self.final_time - current_time) / self.final_time < 1e-4:
                break
            current_time += dt_datadump
            index += 1

    def _calculate_cycle_outputs(self):
        """
        Calculate cycle outputs.
        """

        mylog.warn(
            'Calculating cycle outputs.  Dataset times will be unavailable.')

        if self.stop_cycle is None or \
            not 'CycleSkipDataDump' in self.parameters or \
            self.parameters['CycleSkipDataDump'] <= 0.0:
            return []

        self.all_time_outputs = []
        index = 0
        for cycle in range(0, self.stop_cycle + 1,
                           self.parameters['CycleSkipDataDump']):
            filename = os.path.join(
                self.parameters['GlobalDir'],
                "%s%04d" % (self.parameters['DataDumpDir'], index),
                "%s%04d" % (self.parameters['DataDumpName'], index))

            output = {'index': index, 'filename': filename, 'cycle': cycle}
            self.all_time_outputs.append(output)
            index += 1

    def _get_all_outputs(self, find_outputs=False):
        """
        Get all potential datasets and combine into a time-sorted list.
        """

        # Create the set of outputs from which further selection will be done.
        if find_outputs:
            self._find_outputs()

        elif self.parameters['dtDataDump'] > 0 and \
          self.parameters['CycleSkipDataDump'] > 0:
            mylog.info(
                "Simulation %s has both dtDataDump and CycleSkipDataDump set.",
                self.parameter_filename)
            mylog.info("    Unable to calculate datasets.  " +
                       "Attempting to search in the current directory")
            self._find_outputs()

        else:
            # Get all time or cycle outputs.
            if self.parameters['CycleSkipDataDump'] > 0:
                self._calculate_cycle_outputs()
            else:
                self._calculate_time_outputs()

            # Calculate times for redshift outputs.
            if self.cosmological_simulation:
                for output in self.all_redshift_outputs:
                    output["time"] = self.cosmology.t_from_z(
                        output["redshift"])
                self.all_redshift_outputs.sort(key=lambda obj: obj["time"])

            self.all_outputs = self.all_time_outputs + self.all_redshift_outputs
            if self.parameters['CycleSkipDataDump'] <= 0:
                self.all_outputs.sort(key=lambda obj: obj['time'].to_ndarray())

    def _calculate_simulation_bounds(self):
        """
        Figure out the starting and stopping time and redshift for the simulation.
        """

        if 'StopCycle' in self.parameters:
            self.stop_cycle = self.parameters['StopCycle']

        # Convert initial/final redshifts to times.
        if self.cosmological_simulation:
            self.initial_time = self.cosmology.t_from_z(self.initial_redshift)
            self.initial_time.units.registry = self.unit_registry
            self.final_time = self.cosmology.t_from_z(self.final_redshift)
            self.final_time.units.registry = self.unit_registry

        # If not a cosmology simulation, figure out the stopping criteria.
        else:
            if 'InitialTime' in self.parameters:
                self.initial_time = self.quan(self.parameters['InitialTime'],
                                              "code_time")
            else:
                self.initial_time = self.quan(0., "code_time")

            if 'StopTime' in self.parameters:
                self.final_time = self.quan(self.parameters['StopTime'],
                                            "code_time")
            else:
                self.final_time = None
            if not ('StopTime' in self.parameters
                    or 'StopCycle' in self.parameters):
                raise NoStoppingCondition(self.parameter_filename)
            if self.final_time is None:
                mylog.warn(
                    "Simulation %s has no stop time set, stopping condition " +
                    "will be based only on cycles.", self.parameter_filename)

    def _set_parameter_defaults(self):
        """
        Set some default parameters to avoid problems if they are not in the parameter file.
        """

        self.parameters['GlobalDir'] = self.directory
        self.parameters['DataDumpName'] = "data"
        self.parameters['DataDumpDir'] = "DD"
        self.parameters['RedshiftDumpName'] = "RedshiftOutput"
        self.parameters['RedshiftDumpDir'] = "RD"
        self.parameters['ComovingCoordinates'] = 0
        self.parameters['TopGridRank'] = 3
        self.parameters['DomainLeftEdge'] = np.zeros(
            self.parameters['TopGridRank'])
        self.parameters['DomainRightEdge'] = np.ones(
            self.parameters['TopGridRank'])
        self.parameters['RefineBy'] = 2  # technically not the enzo default
        self.parameters['StopCycle'] = 100000
        self.parameters['dtDataDump'] = 0.
        self.parameters['CycleSkipDataDump'] = 0.
        self.parameters['TimeUnits'] = 1.

    def _find_outputs(self):
        """
        Search for directories matching the data dump keywords.
        If found, get dataset times py opening the ds.
        """

        # look for time outputs.
        potential_time_outputs = \
          glob.glob(os.path.join(self.parameters['GlobalDir'],
                                 "%s*" % self.parameters['DataDumpDir']))
        self.all_time_outputs = \
          self._check_for_outputs(potential_time_outputs)
        self.all_time_outputs.sort(key=lambda obj: obj['time'])

        # look for redshift outputs.
        potential_redshift_outputs = \
          glob.glob(os.path.join(self.parameters['GlobalDir'],
                                 "%s*" % self.parameters['RedshiftDumpDir']))
        self.all_redshift_outputs = \
          self._check_for_outputs(potential_redshift_outputs)
        self.all_redshift_outputs.sort(key=lambda obj: obj['time'])

        self.all_outputs = self.all_time_outputs + self.all_redshift_outputs
        self.all_outputs.sort(key=lambda obj: obj['time'])
        only_on_root(mylog.info, "Located %d total outputs.",
                     len(self.all_outputs))

        # manually set final time and redshift with last output
        if self.all_outputs:
            self.final_time = self.all_outputs[-1]['time']
            if self.cosmological_simulation:
                self.final_redshift = self.all_outputs[-1]['redshift']

    def _check_for_outputs(self, potential_outputs):
        """
        Check a list of files to see if they are valid datasets.
        """

        only_on_root(mylog.info, "Checking %d potential outputs.",
                     len(potential_outputs))

        my_outputs = {}
        for my_storage, output in parallel_objects(potential_outputs,
                                                   storage=my_outputs):
            if self.parameters['DataDumpDir'] in output:
                dir_key = self.parameters['DataDumpDir']
                output_key = self.parameters['DataDumpName']
            else:
                dir_key = self.parameters['RedshiftDumpDir']
                output_key = self.parameters['RedshiftDumpName']
            index = output[output.find(dir_key) + len(dir_key):]
            filename = os.path.join(self.parameters['GlobalDir'],
                                    "%s%s" % (dir_key, index),
                                    "%s%s" % (output_key, index))
            if os.path.exists(filename):
                try:
                    ds = load(filename)
                    if ds is not None:
                        my_storage.result = {
                            'filename': filename,
                            'time': ds.current_time.in_units("s")
                        }
                        if ds.cosmological_simulation:
                            my_storage.result['redshift'] = ds.current_redshift
                except YTOutputNotIdentified:
                    mylog.error('Failed to load %s', filename)
        my_outputs = [my_output for my_output in my_outputs.values() \
                      if my_output is not None]

        return my_outputs

    def _write_cosmology_outputs(self,
                                 filename,
                                 outputs,
                                 start_index,
                                 decimals=3):
        """
        Write cosmology output parameters for a cosmology splice.
        """

        mylog.info("Writing redshift output list to %s.", filename)
        f = open(filename, 'w')
        for q, output in enumerate(outputs):
            z_string = "%%s[%%d] = %%.%df" % decimals
            f.write(
                ("CosmologyOutputRedshift[%d] = %." + str(decimals) + "f\n") %
                ((q + start_index), output['redshift']))
        f.close()
class EnzoSimulation(SimulationTimeSeries):
    r"""
    Initialize an Enzo Simulation object.

    Upon creation, the parameter file is parsed and the time and redshift
    are calculated and stored in all_outputs.  A time units dictionary is
    instantiated to allow for time outputs to be requested with physical
    time units.  The get_time_series can be used to generate a
    DatasetSeries object.

    parameter_filename : str
        The simulation parameter file.
    find_outputs : bool
        If True, subdirectories within the GlobalDir directory are
        searched one by one for datasets.  Time and redshift
        information are gathered by temporarily instantiating each
        dataset.  This can be used when simulation data was created
        in a non-standard way, making it difficult to guess the
        corresponding time and redshift information.
        Default: False.

    Examples
    --------
    >>> import yt
    >>> es = yt.simulation("my_simulation.par", "Enzo")
    >>> es.get_time_series()
    >>> for ds in es:
    ...     print ds.current_time

    """

    def __init__(self, parameter_filename, find_outputs=False):
        self.simulation_type = "grid"
        self.key_parameters = ["stop_cycle"]
        SimulationTimeSeries.__init__(self, parameter_filename,
                                      find_outputs=find_outputs)

    def _set_units(self):
        self.unit_registry = UnitRegistry()
        self.unit_registry.lut["code_time"] = (1.0, dimensions.time)
        if self.cosmological_simulation:
            # Instantiate EnzoCosmology object for units and time conversions.
            self.cosmology = \
              EnzoCosmology(self.parameters['CosmologyHubbleConstantNow'],
                            self.parameters['CosmologyOmegaMatterNow'],
                            self.parameters['CosmologyOmegaLambdaNow'],
                            0.0, self.parameters['CosmologyInitialRedshift'],
                            unit_registry=self.unit_registry)

            self.time_unit = self.cosmology.time_unit.in_units("s")
            self.unit_registry.modify("h", self.hubble_constant)
            # Comoving lengths
            for my_unit in ["m", "pc", "AU", "au"]:
                new_unit = "%scm" % my_unit
                # technically not true, but should be ok
                self.unit_registry.add(new_unit, self.unit_registry.lut[my_unit][0],
                                       dimensions.length, "\\rm{%s}/(1+z)" % my_unit)
            self.length_unit = self.quan(self.box_size, "Mpccm / h",
                                         registry=self.unit_registry)
            self.box_size = self.length_unit
        else:
            self.time_unit = self.quan(self.parameters["TimeUnits"], "s")
        self.unit_registry.modify("code_time", self.time_unit)

    def get_time_series(self, time_data=True, redshift_data=True,
                        initial_time=None, final_time=None,
                        initial_redshift=None, final_redshift=None,
                        initial_cycle=None, final_cycle=None,
                        times=None, redshifts=None, tolerance=None,
                        parallel=True, setup_function=None):

        """
        Instantiate a DatasetSeries object for a set of outputs.

        If no additional keywords given, a DatasetSeries object will be
        created with all potential datasets created by the simulation.

        Outputs can be gather by specifying a time or redshift range
        (or combination of time and redshift), with a specific list of
        times or redshifts, a range of cycle numbers (for cycle based
        output), or by simply searching all subdirectories within the
        simulation directory.

        time_data : bool
            Whether or not to include time outputs when gathering
            datasets for time series.
            Default: True.
        redshift_data : bool
            Whether or not to include redshift outputs when gathering
            datasets for time series.
            Default: True.
        initial_time : tuple of type (float, str)
            The earliest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (5.0, "Gyr").  If None, the initial time of the 
            simulation is used.  This can be used in combination with 
            either final_time or final_redshift.
            Default: None.
        final_time : tuple of type (float, str)
            The latest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (13.7, "Gyr"). If None, the final time of the 
            simulation is used.  This can be used in combination with either 
            initial_time or initial_redshift.
            Default: None.
        times : tuple of type (float array, str)
            A list of times for which outputs will be found and the units 
            of those values.  For example, ([0, 1, 2, 3], "s").
            Default: None.
        initial_redshift : float
            The earliest redshift for outputs to be included.  If None,
            the initial redshift of the simulation is used.  This can be
            used in combination with either final_time or
            final_redshift.
            Default: None.
        final_redshift : float
            The latest redshift for outputs to be included.  If None,
            the final redshift of the simulation is used.  This can be
            used in combination with either initial_time or
            initial_redshift.
            Default: None.
        redshifts : array_like
            A list of redshifts for which outputs will be found.
            Default: None.
        initial_cycle : float
            The earliest cycle for outputs to be included.  If None,
            the initial cycle of the simulation is used.  This can
            only be used with final_cycle.
            Default: None.
        final_cycle : float
            The latest cycle for outputs to be included.  If None,
            the final cycle of the simulation is used.  This can
            only be used in combination with initial_cycle.
            Default: None.
        tolerance : float
            Used in combination with "times" or "redshifts" keywords,
            this is the tolerance within which outputs are accepted
            given the requested times or redshifts.  If None, the
            nearest output is always taken.
            Default: None.
        parallel : bool/int
            If True, the generated DatasetSeries will divide the work
            such that a single processor works on each dataset.  If an
            integer is supplied, the work will be divided into that
            number of jobs.
            Default: True.
        setup_function : callable, accepts a ds
            This function will be called whenever a dataset is loaded.

        Examples
        --------

        >>> import yt
        >>> es = yt.simulation("my_simulation.par", "Enzo")
        
        >>> es.get_time_series(initial_redshift=10, final_time=(13.7, "Gyr"), 
                               redshift_data=False)

        >>> es.get_time_series(redshifts=[3, 2, 1, 0])

        >>> es.get_time_series(final_cycle=100000)

        >>> # after calling get_time_series
        >>> for ds in es.piter():
        ...     p = ProjectionPlot(ds, 'x', "density")
        ...     p.save()

        >>> # An example using the setup_function keyword
        >>> def print_time(ds):
        ...     print ds.current_time
        >>> es.get_time_series(setup_function=print_time)
        >>> for ds in es:
        ...     SlicePlot(ds, "x", "Density").save()

        """

        if (initial_redshift is not None or \
            final_redshift is not None) and \
            not self.cosmological_simulation:
            raise InvalidSimulationTimeSeries(
                "An initial or final redshift has been given for a " +
                "noncosmological simulation.")

        if time_data and redshift_data:
            my_all_outputs = self.all_outputs
        elif time_data:
            my_all_outputs = self.all_time_outputs
        elif redshift_data:
            my_all_outputs = self.all_redshift_outputs
        else:
            raise InvalidSimulationTimeSeries('Both time_data and redshift_data are False.')

        if not my_all_outputs:
            DatasetSeries.__init__(self, outputs=[], parallel=parallel)
            mylog.info("0 outputs loaded into time series.")
            return

        # Apply selection criteria to the set.
        if times is not None:
            my_outputs = self._get_outputs_by_key("time", times,
                                                  tolerance=tolerance,
                                                  outputs=my_all_outputs)

        elif redshifts is not None:
            my_outputs = self._get_outputs_by_key("redshift", redshifts,
                                                  tolerance=tolerance,
                                                  outputs=my_all_outputs)

        elif initial_cycle is not None or final_cycle is not None:
            if initial_cycle is None:
                initial_cycle = 0
            else:
                initial_cycle = max(initial_cycle, 0)
            if final_cycle is None:
                final_cycle = self.parameters['StopCycle']
            else:
                final_cycle = min(final_cycle, self.parameters['StopCycle'])

            my_outputs = my_all_outputs[int(ceil(float(initial_cycle) /
                                                 self.parameters['CycleSkipDataDump'])):
                                        (final_cycle /  self.parameters['CycleSkipDataDump'])+1]

        else:
            if initial_time is not None:
                if isinstance(initial_time, float):
                    initial_time = self.quan(initial_time, "code_time")
                elif isinstance(initial_time, tuple) and len(initial_time) == 2:
                    initial_time = self.quan(*initial_time)
                elif not isinstance(initial_time, YTArray):
                    raise RuntimeError(
                        "Error: initial_time must be given as a float or " +
                        "tuple of (value, units).")
            elif initial_redshift is not None:
                my_initial_time = self.cosmology.t_from_z(initial_redshift)
            else:
                my_initial_time = self.initial_time

            if final_time is not None:
                if isinstance(final_time, float):
                    final_time = self.quan(final_time, "code_time")
                elif isinstance(final_time, tuple) and len(final_time) == 2:
                    final_time = self.quan(*final_time)
                elif not isinstance(final_time, YTArray):
                    raise RuntimeError(
                        "Error: final_time must be given as a float or " +
                        "tuple of (value, units).")
                my_final_time = final_time.in_units("s")
            elif final_redshift is not None:
                my_final_time = self.cosmology.t_from_z(final_redshift)
            else:
                my_final_time = self.final_time

            my_initial_time.convert_to_units("s")
            my_final_time.convert_to_units("s")
            my_times = np.array([a['time'] for a in my_all_outputs])
            my_indices = np.digitize([my_initial_time, my_final_time], my_times)
            if my_initial_time == my_times[my_indices[0] - 1]: my_indices[0] -= 1
            my_outputs = my_all_outputs[my_indices[0]:my_indices[1]]

        init_outputs = []
        for output in my_outputs:
            if os.path.exists(output['filename']):
                init_outputs.append(output['filename'])
            
        DatasetSeries.__init__(self, outputs=init_outputs, parallel=parallel,
                                setup_function=setup_function)
        mylog.info("%d outputs loaded into time series.", len(init_outputs))

    def _parse_parameter_file(self):
        """
        Parses the parameter file and establishes the various
        dictionaries.
        """

        self.conversion_factors = {}
        redshift_outputs = []

        # Let's read the file
        lines = open(self.parameter_filename).readlines()
        for line in (l.strip() for l in lines):
            if '#' in line: line = line[0:line.find('#')]
            if '//' in line: line = line[0:line.find('//')]
            if len(line) < 2: continue
            param, vals = (i.strip() for i in line.split("=", 1))
            # First we try to decipher what type of value it is.
            vals = vals.split()
            # Special case approaching.
            if "(do" in vals: vals = vals[:1]
            if len(vals) == 0:
                pcast = str # Assume NULL output
            else:
                v = vals[0]
                # Figure out if it's castable to floating point:
                try:
                    float(v)
                except ValueError:
                    pcast = str
                else:
                    if any("." in v or "e" in v for v in vals):
                        pcast = float
                    elif v == "inf":
                        pcast = str
                    else:
                        pcast = int
            # Now we figure out what to do with it.
            if param.endswith("Units") and not param.startswith("Temperature"):
                dataType = param[:-5]
                # This one better be a float.
                self.conversion_factors[dataType] = float(vals[0])
            if param.startswith("CosmologyOutputRedshift["):
                index = param[param.find("[")+1:param.find("]")]
                redshift_outputs.append({'index':int(index), 'redshift':float(vals[0])})
            elif len(vals) == 0:
                vals = ""
            elif len(vals) == 1:
                vals = pcast(vals[0])
            else:
                vals = np.array([pcast(i) for i in vals if i != "-99999"])
            self.parameters[param] = vals
        self.refine_by = self.parameters["RefineBy"]
        self.dimensionality = self.parameters["TopGridRank"]
        if self.dimensionality > 1:
            self.domain_dimensions = self.parameters["TopGridDimensions"]
            if len(self.domain_dimensions) < 3:
                tmp = self.domain_dimensions.tolist()
                tmp.append(1)
                self.domain_dimensions = np.array(tmp)
            self.domain_left_edge = np.array(self.parameters["DomainLeftEdge"],
                                             "float64").copy()
            self.domain_right_edge = np.array(self.parameters["DomainRightEdge"],
                                             "float64").copy()
        else:
            self.domain_left_edge = np.array(self.parameters["DomainLeftEdge"],
                                             "float64")
            self.domain_right_edge = np.array(self.parameters["DomainRightEdge"],
                                             "float64")
            self.domain_dimensions = np.array([self.parameters["TopGridDimensions"],1,1])

        if self.parameters["ComovingCoordinates"]:
            cosmo_attr = {'box_size': 'CosmologyComovingBoxSize',
                          'omega_lambda': 'CosmologyOmegaLambdaNow',
                          'omega_matter': 'CosmologyOmegaMatterNow',
                          'hubble_constant': 'CosmologyHubbleConstantNow',
                          'initial_redshift': 'CosmologyInitialRedshift',
                          'final_redshift': 'CosmologyFinalRedshift'}
            self.cosmological_simulation = 1
            for a, v in cosmo_attr.items():
                if not v in self.parameters:
                    raise MissingParameter(self.parameter_filename, v)
                setattr(self, a, self.parameters[v])
        else:
            self.cosmological_simulation = 0
            self.omega_lambda = self.omega_matter = \
                self.hubble_constant = 0.0

        # make list of redshift outputs
        self.all_redshift_outputs = []
        if not self.cosmological_simulation: return
        for output in redshift_outputs:
            output['filename'] = os.path.join(self.parameters['GlobalDir'],
                                              "%s%04d" % (self.parameters['RedshiftDumpDir'],
                                                          output['index']),
                                              "%s%04d" % (self.parameters['RedshiftDumpName'],
                                                          output['index']))
            del output['index']
        self.all_redshift_outputs = redshift_outputs

    def _calculate_time_outputs(self):
        """
        Calculate time outputs and their redshifts if cosmological.
        """

        self.all_time_outputs = []
        if self.final_time is None or \
            not 'dtDataDump' in self.parameters or \
            self.parameters['dtDataDump'] <= 0.0: return []

        index = 0
        current_time = self.initial_time.copy()
        dt_datadump = self.quan(self.parameters['dtDataDump'], "code_time")
        while current_time <= self.final_time + dt_datadump:
            filename = os.path.join(self.parameters['GlobalDir'],
                                    "%s%04d" % (self.parameters['DataDumpDir'], index),
                                    "%s%04d" % (self.parameters['DataDumpName'], index))

            output = {'index': index, 'filename': filename, 'time': current_time.copy()}
            output['time'] = min(output['time'], self.final_time)
            if self.cosmological_simulation:
                output['redshift'] = self.cosmology.z_from_t(current_time)

            self.all_time_outputs.append(output)
            if np.abs(self.final_time - current_time) / self.final_time < 1e-4: break
            current_time += dt_datadump
            index += 1

    def _calculate_cycle_outputs(self):
        """
        Calculate cycle outputs.
        """

        mylog.warn('Calculating cycle outputs.  Dataset times will be unavailable.')

        if self.stop_cycle is None or \
            not 'CycleSkipDataDump' in self.parameters or \
            self.parameters['CycleSkipDataDump'] <= 0.0: return []

        self.all_time_outputs = []
        index = 0
        for cycle in range(0, self.stop_cycle+1, self.parameters['CycleSkipDataDump']):
            filename = os.path.join(self.parameters['GlobalDir'],
                                    "%s%04d" % (self.parameters['DataDumpDir'], index),
                                    "%s%04d" % (self.parameters['DataDumpName'], index))

            output = {'index': index, 'filename': filename, 'cycle': cycle}
            self.all_time_outputs.append(output)
            index += 1

    def _get_all_outputs(self, find_outputs=False):
        """
        Get all potential datasets and combine into a time-sorted list.
        """

        # Create the set of outputs from which further selection will be done.
        if find_outputs:
            self._find_outputs()

        elif self.parameters['dtDataDump'] > 0 and \
          self.parameters['CycleSkipDataDump'] > 0:
            mylog.info(
                "Simulation %s has both dtDataDump and CycleSkipDataDump set.",
                self.parameter_filename )
            mylog.info(
                "    Unable to calculate datasets.  " +
                "Attempting to search in the current directory")
            self._find_outputs()

        else:
            # Get all time or cycle outputs.
            if self.parameters['CycleSkipDataDump'] > 0:
                self._calculate_cycle_outputs()
            else:
                self._calculate_time_outputs()

            # Calculate times for redshift outputs.
            if self.cosmological_simulation:
                for output in self.all_redshift_outputs:
                    output["time"] = self.cosmology.t_from_z(output["redshift"])
                self.all_redshift_outputs.sort(key=lambda obj:obj["time"])

            self.all_outputs = self.all_time_outputs + self.all_redshift_outputs
            if self.parameters['CycleSkipDataDump'] <= 0:
                self.all_outputs.sort(key=lambda obj:obj['time'].to_ndarray())

    def _calculate_simulation_bounds(self):
        """
        Figure out the starting and stopping time and redshift for the simulation.
        """

        if 'StopCycle' in self.parameters:
            self.stop_cycle = self.parameters['StopCycle']

        # Convert initial/final redshifts to times.
        if self.cosmological_simulation:
            self.initial_time = self.cosmology.t_from_z(self.initial_redshift)
            self.initial_time.units.registry = self.unit_registry
            self.final_time = self.cosmology.t_from_z(self.final_redshift)
            self.final_time.units.registry = self.unit_registry

        # If not a cosmology simulation, figure out the stopping criteria.
        else:
            if 'InitialTime' in self.parameters:
                self.initial_time = self.quan(self.parameters['InitialTime'], "code_time")
            else:
                self.initial_time = self.quan(0., "code_time")

            if 'StopTime' in self.parameters:
                self.final_time = self.quan(self.parameters['StopTime'], "code_time")
            else:
                self.final_time = None
            if not ('StopTime' in self.parameters or
                    'StopCycle' in self.parameters):
                raise NoStoppingCondition(self.parameter_filename)
            if self.final_time is None:
                mylog.warn(
                    "Simulation %s has no stop time set, stopping condition " +
                    "will be based only on cycles.",
                    self.parameter_filename)

    def _set_parameter_defaults(self):
        """
        Set some default parameters to avoid problems if they are not in the parameter file.
        """

        self.parameters['GlobalDir'] = self.directory
        self.parameters['DataDumpName'] = "data"
        self.parameters['DataDumpDir'] = "DD"
        self.parameters['RedshiftDumpName'] = "RedshiftOutput"
        self.parameters['RedshiftDumpDir'] = "RD"
        self.parameters['ComovingCoordinates'] = 0
        self.parameters['TopGridRank'] = 3
        self.parameters['DomainLeftEdge'] = np.zeros(self.parameters['TopGridRank'])
        self.parameters['DomainRightEdge'] = np.ones(self.parameters['TopGridRank'])
        self.parameters['RefineBy'] = 2 # technically not the enzo default
        self.parameters['StopCycle'] = 100000
        self.parameters['dtDataDump'] = 0.
        self.parameters['CycleSkipDataDump'] = 0.
        self.parameters['TimeUnits'] = 1.

    def _find_outputs(self):
        """
        Search for directories matching the data dump keywords.
        If found, get dataset times py opening the ds.
        """

        # look for time outputs.
        potential_time_outputs = \
          glob.glob(os.path.join(self.parameters['GlobalDir'],
                                 "%s*" % self.parameters['DataDumpDir']))
        self.all_time_outputs = \
          self._check_for_outputs(potential_time_outputs)
        self.all_time_outputs.sort(key=lambda obj: obj['time'])

        # look for redshift outputs.
        potential_redshift_outputs = \
          glob.glob(os.path.join(self.parameters['GlobalDir'],
                                 "%s*" % self.parameters['RedshiftDumpDir']))
        self.all_redshift_outputs = \
          self._check_for_outputs(potential_redshift_outputs)
        self.all_redshift_outputs.sort(key=lambda obj: obj['time'])

        self.all_outputs = self.all_time_outputs + self.all_redshift_outputs
        self.all_outputs.sort(key=lambda obj: obj['time'])
        only_on_root(mylog.info, "Located %d total outputs.", len(self.all_outputs))

        # manually set final time and redshift with last output
        if self.all_outputs:
            self.final_time = self.all_outputs[-1]['time']
            if self.cosmological_simulation:
                self.final_redshift = self.all_outputs[-1]['redshift']

    def _check_for_outputs(self, potential_outputs):
        """
        Check a list of files to see if they are valid datasets.
        """

        only_on_root(mylog.info, "Checking %d potential outputs.", 
                     len(potential_outputs))

        my_outputs = {}
        for my_storage, output in parallel_objects(potential_outputs, 
                                                   storage=my_outputs):
            if self.parameters['DataDumpDir'] in output:
                dir_key = self.parameters['DataDumpDir']
                output_key = self.parameters['DataDumpName']
            else:
                dir_key = self.parameters['RedshiftDumpDir']
                output_key = self.parameters['RedshiftDumpName']
            index = output[output.find(dir_key) + len(dir_key):]
            filename = os.path.join(self.parameters['GlobalDir'],
                                    "%s%s" % (dir_key, index),
                                    "%s%s" % (output_key, index))
            if os.path.exists(filename):
                try:
                    ds = load(filename)
                    if ds is not None:
                        my_storage.result = {'filename': filename,
                                             'time': ds.current_time.in_units("s")}
                        if ds.cosmological_simulation:
                            my_storage.result['redshift'] = ds.current_redshift
                except YTOutputNotIdentified:
                    mylog.error('Failed to load %s', filename)
        my_outputs = [my_output for my_output in my_outputs.values() \
                      if my_output is not None]

        return my_outputs

    def _write_cosmology_outputs(self, filename, outputs, start_index,
                                 decimals=3):
        """
        Write cosmology output parameters for a cosmology splice.
        """

        mylog.info("Writing redshift output list to %s.", filename)
        f = open(filename, 'w')
        for q, output in enumerate(outputs):
            z_string = "%%s[%%d] = %%.%df" % decimals
            f.write(("CosmologyOutputRedshift[%d] = %."
                     + str(decimals) + "f\n") %
                    ((q + start_index), output['redshift']))
        f.close()
Example #28
0
def test_registry_json():
    reg = UnitRegistry()
    json_reg = reg.to_json()
    unserialized_reg = UnitRegistry.from_json(json_reg)

    assert_equal(reg.lut, unserialized_reg.lut)
Example #29
0
    equivalence_registry
from yt.units.unit_lookup_table import \
    unit_prefixes, prefixable_units, latex_prefixes, \
    default_base_units
from yt.units.unit_registry import \
    UnitRegistry, \
    UnitParseError
from yt.utilities.exceptions import YTUnitsNotReducible

import copy
import token

class InvalidUnitOperation(Exception):
    pass

default_unit_registry = UnitRegistry()

sympy_one = sympify(1)

global_dict = {
    'Symbol': Symbol,
    'Integer': Integer,
    'Float': Float,
    'Rational': Rational,
    'sqrt': sqrt
}

unit_system_registry = {}

def auto_positive_symbol(tokens, local_dict, global_dict):
    """
Example #30
0
    def _parse_parameter_file(self):
        self.refine_by = 2
        with h5py.File(self.parameter_filename, mode="r") as f:
            for key in f.attrs.keys():
                v = parse_h5_attr(f, key)
                if key == "con_args":
                    try:
                        v = eval(v)
                    except ValueError:
                        # support older ytdata outputs
                        v = v.astype("str")
                    except NameError:
                        # This is the most common error we expect, and it
                        # results from having the eval do a concatenated decoded
                        # set of the values.
                        v = [_.decode("utf8") for _ in v]
                self.parameters[key] = v
            self._with_parameter_file_open(f)

        # if saved, restore unit registry from the json string
        if "unit_registry_json" in self.parameters:
            self.unit_registry = UnitRegistry.from_json(
                self.parameters["unit_registry_json"])
            # reset self.arr and self.quan to use new unit_registry
            self._arr = None
            self._quan = None
            for dim in [
                    "length",
                    "mass",
                    "pressure",
                    "temperature",
                    "time",
                    "velocity",
            ]:
                cu = "code_" + dim
                if cu not in self.unit_registry:
                    self.unit_registry.add(cu, 1.0, getattr(dimensions, dim))
            if "code_magnetic" not in self.unit_registry:
                self.unit_registry.add("code_magnetic", 1.0,
                                       dimensions.magnetic_field)

        # if saved, set unit system
        if "unit_system_name" in self.parameters:
            unit_system = self.parameters["unit_system_name"]
            del self.parameters["unit_system_name"]
        else:
            unit_system = "cgs"
        # reset unit system since we may have a new unit registry
        self._assign_unit_system(unit_system)

        # assign units to parameters that have associated unit string
        del_pars = []
        for par in self.parameters:
            ustr = f"{par}_units"
            if ustr in self.parameters:
                if isinstance(self.parameters[par], np.ndarray):
                    to_u = self.arr
                else:
                    to_u = self.quan
                self.parameters[par] = to_u(self.parameters[par],
                                            self.parameters[ustr])
                del_pars.append(ustr)
        for par in del_pars:
            del self.parameters[par]

        for attr in self._con_attrs:
            try:
                sattr = _set_attrs.get(attr, attr)
                setattr(self, sattr, self.parameters.get(attr))
            except TypeError:
                # some Dataset attributes are properties with setters
                # which may not accept None as an input
                pass

        if self.geometry is None:
            self.geometry = "cartesian"
Example #31
0
class GadgetSimulation(SimulationTimeSeries):
    r"""
    Initialize an Gadget Simulation object.

    Upon creation, the parameter file is parsed and the time and redshift
    are calculated and stored in all_outputs.  A time units dictionary is
    instantiated to allow for time outputs to be requested with physical
    time units.  The get_time_series can be used to generate a
    DatasetSeries object.

    parameter_filename : str
        The simulation parameter file.
    find_outputs : bool
        If True, the OutputDir directory is searched for datasets.  
        Time and redshift information are gathered by temporarily 
        instantiating each dataset.  This can be used when simulation 
        data was created in a non-standard way, making it difficult 
        to guess the corresponding time and redshift information.
        Default: False.

    Examples
    --------
    >>> import yt
    >>> gs = yt.simulation("my_simulation.par", "Gadget")
    >>> gs.get_time_series()
    >>> for ds in gs:
    ...     print ds.current_time

    """
    def __init__(self, parameter_filename, find_outputs=False):
        self.simulation_type = "particle"
        self.dimensionality = 3
        SimulationTimeSeries.__init__(self,
                                      parameter_filename,
                                      find_outputs=find_outputs)

    def _set_units(self):
        self.unit_registry = UnitRegistry()
        self.time_unit = self.quan(1.0, "s")
        if self.cosmological_simulation:
            # Instantiate Cosmology object for units and time conversions.
            self.cosmology = \
              Cosmology(hubble_constant=self.hubble_constant,
                        omega_matter=self.omega_matter,
                        omega_lambda=self.omega_lambda,
                        unit_registry=self.unit_registry)
            self.unit_registry.modify("h", self.hubble_constant)
            # Comoving lengths
            for my_unit in ["m", "pc", "AU", "au"]:
                new_unit = "%scm" % my_unit
                # technically not true, but should be ok
                self.unit_registry.add(new_unit,
                                       self.unit_registry.lut[my_unit][0],
                                       dimensions.length,
                                       "\\rm{%s}/(1+z)" % my_unit)
            self.length_unit = self.quan(self.unit_base["UnitLength_in_cm"],
                                         "cmcm / h",
                                         registry=self.unit_registry)
            self.mass_unit = self.quan(self.unit_base["UnitMass_in_g"],
                                       "g / h",
                                       registry=self.unit_registry)
            self.box_size = self.box_size * self.length_unit
            self.domain_left_edge = self.domain_left_edge * self.length_unit
            self.domain_right_edge = self.domain_right_edge * self.length_unit
            self.unit_registry.add("unitary", float(self.box_size.in_base()),
                                   self.length_unit.units.dimensions)
        else:
            # Read time from file for non-cosmological sim
            self.time_unit = self.quan(
                self.unit_base["UnitLength_in_cm"]/ \
                    self.unit_base["UnitVelocity_in_cm_per_s"], "s")
            self.unit_registry.add("code_time", 1.0, dimensions.time)
            self.unit_registry.modify("code_time", self.time_unit)
            # Length
            self.length_unit = self.quan(self.unit_base["UnitLength_in_cm"],
                                         "cm")
            self.unit_registry.add("code_length", 1.0, dimensions.length)
            self.unit_registry.modify("code_length", self.length_unit)

    def get_time_series(self,
                        initial_time=None,
                        final_time=None,
                        initial_redshift=None,
                        final_redshift=None,
                        times=None,
                        redshifts=None,
                        tolerance=None,
                        parallel=True,
                        setup_function=None):
        """
        Instantiate a DatasetSeries object for a set of outputs.

        If no additional keywords given, a DatasetSeries object will be
        created with all potential datasets created by the simulation.

        Outputs can be gather by specifying a time or redshift range
        (or combination of time and redshift), with a specific list of
        times or redshifts), or by simply searching all subdirectories 
        within the simulation directory.

        initial_time : tuple of type (float, str)
            The earliest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (5.0, "Gyr").  If None, the initial time of the 
            simulation is used.  This can be used in combination with 
            either final_time or final_redshift.
            Default: None.
        final_time : tuple of type (float, str)
            The latest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (13.7, "Gyr"). If None, the final time of the 
            simulation is used.  This can be used in combination with either 
            initial_time or initial_redshift.
            Default: None.
        times : tuple of type (float array, str)
            A list of times for which outputs will be found and the units 
            of those values.  For example, ([0, 1, 2, 3], "s").
            Default: None.
        initial_redshift : float
            The earliest redshift for outputs to be included.  If None,
            the initial redshift of the simulation is used.  This can be
            used in combination with either final_time or
            final_redshift.
            Default: None.
        final_redshift : float
            The latest redshift for outputs to be included.  If None,
            the final redshift of the simulation is used.  This can be
            used in combination with either initial_time or
            initial_redshift.
            Default: None.
        redshifts : array_like
            A list of redshifts for which outputs will be found.
            Default: None.
        tolerance : float
            Used in combination with "times" or "redshifts" keywords,
            this is the tolerance within which outputs are accepted
            given the requested times or redshifts.  If None, the
            nearest output is always taken.
            Default: None.
        parallel : bool/int
            If True, the generated DatasetSeries will divide the work
            such that a single processor works on each dataset.  If an
            integer is supplied, the work will be divided into that
            number of jobs.
            Default: True.
        setup_function : callable, accepts a ds
            This function will be called whenever a dataset is loaded.

        Examples
        --------

        >>> import yt
        >>> gs = yt.simulation("my_simulation.par", "Gadget")
        
        >>> gs.get_time_series(initial_redshift=10, final_time=(13.7, "Gyr"))

        >>> gs.get_time_series(redshifts=[3, 2, 1, 0])

        >>> # after calling get_time_series
        >>> for ds in gs.piter():
        ...     p = ProjectionPlot(ds, "x", "density")
        ...     p.save()

        >>> # An example using the setup_function keyword
        >>> def print_time(ds):
        ...     print ds.current_time
        >>> gs.get_time_series(setup_function=print_time)
        >>> for ds in gs:
        ...     SlicePlot(ds, "x", "Density").save()

        """

        if (initial_redshift is not None or \
            final_redshift is not None) and \
            not self.cosmological_simulation:
            raise InvalidSimulationTimeSeries(
                "An initial or final redshift has been given for a " +
                "noncosmological simulation.")

        my_all_outputs = self.all_outputs
        if not my_all_outputs:
            DatasetSeries.__init__(self,
                                   outputs=[],
                                   parallel=parallel,
                                   unit_base=self.unit_base)
            mylog.info("0 outputs loaded into time series.")
            return

        # Apply selection criteria to the set.
        if times is not None:
            my_outputs = self._get_outputs_by_key("time",
                                                  times,
                                                  tolerance=tolerance,
                                                  outputs=my_all_outputs)

        elif redshifts is not None:
            my_outputs = self._get_outputs_by_key("redshift",
                                                  redshifts,
                                                  tolerance=tolerance,
                                                  outputs=my_all_outputs)

        else:
            if initial_time is not None:
                if isinstance(initial_time, float):
                    initial_time = self.quan(initial_time, "code_time")
                elif isinstance(initial_time,
                                tuple) and len(initial_time) == 2:
                    initial_time = self.quan(*initial_time)
                elif not isinstance(initial_time, YTArray):
                    raise RuntimeError(
                        "Error: initial_time must be given as a float or " +
                        "tuple of (value, units).")
            elif initial_redshift is not None:
                my_initial_time = self.cosmology.t_from_z(initial_redshift)
            else:
                my_initial_time = self.initial_time

            if final_time is not None:
                if isinstance(final_time, float):
                    final_time = self.quan(final_time, "code_time")
                elif isinstance(final_time, tuple) and len(final_time) == 2:
                    final_time = self.quan(*final_time)
                elif not isinstance(final_time, YTArray):
                    raise RuntimeError(
                        "Error: final_time must be given as a float or " +
                        "tuple of (value, units).")
                my_final_time = final_time.in_units("s")
            elif final_redshift is not None:
                my_final_time = self.cosmology.t_from_z(final_redshift)
            else:
                my_final_time = self.final_time

            my_initial_time.convert_to_units("s")
            my_final_time.convert_to_units("s")
            my_times = np.array([a["time"] for a in my_all_outputs])
            my_indices = np.digitize([my_initial_time, my_final_time],
                                     my_times)
            if my_initial_time == my_times[my_indices[0] - 1]:
                my_indices[0] -= 1
            my_outputs = my_all_outputs[my_indices[0]:my_indices[1]]

        init_outputs = []
        for output in my_outputs:
            if os.path.exists(output["filename"]):
                init_outputs.append(output["filename"])
        if len(init_outputs) == 0 and len(my_outputs) > 0:
            mylog.warning(
                "Could not find any datasets.  " +
                "Check the value of OutputDir in your parameter file.")

        DatasetSeries.__init__(self,
                               outputs=init_outputs,
                               parallel=parallel,
                               setup_function=setup_function,
                               unit_base=self.unit_base)
        mylog.info("%d outputs loaded into time series.", len(init_outputs))

    def _parse_parameter_file(self):
        """
        Parses the parameter file and establishes the various
        dictionaries.
        """

        self.unit_base = {}

        # Let's read the file
        lines = open(self.parameter_filename).readlines()
        comments = ["%", ";"]
        for line in (l.strip() for l in lines):
            for comment in comments:
                if comment in line: line = line[0:line.find(comment)]
            if len(line) < 2: continue
            param, vals = (i.strip() for i in line.split(None, 1))
            # First we try to decipher what type of value it is.
            vals = vals.split()
            # Special case approaching.
            if "(do" in vals: vals = vals[:1]
            if len(vals) == 0:
                pcast = str  # Assume NULL output
            else:
                v = vals[0]
                # Figure out if it's castable to floating point:
                try:
                    float(v)
                except ValueError:
                    pcast = str
                else:
                    if any("." in v or "e" in v for v in vals):
                        pcast = float
                    elif v == "inf":
                        pcast = str
                    else:
                        pcast = int
            # Now we figure out what to do with it.
            if param.startswith("Unit"):
                self.unit_base[param] = float(vals[0])
            if len(vals) == 0:
                vals = ""
            elif len(vals) == 1:
                vals = pcast(vals[0])
            else:
                vals = np.array([pcast(i) for i in vals])

            self.parameters[param] = vals

        # Domain dimensions for Gadget datasets are always 2x2x2 for octree
        self.domain_dimensions = np.array([2, 2, 2])

        if self.parameters["ComovingIntegrationOn"]:
            cosmo_attr = {
                "box_size": "BoxSize",
                "omega_lambda": "OmegaLambda",
                "omega_matter": "Omega0",
                "hubble_constant": "HubbleParam"
            }
            self.initial_redshift = 1.0 / self.parameters["TimeBegin"] - 1.0
            self.final_redshift = 1.0 / self.parameters["TimeMax"] - 1.0
            self.cosmological_simulation = 1
            for a, v in cosmo_attr.items():
                if v not in self.parameters:
                    raise MissingParameter(self.parameter_filename, v)
                setattr(self, a, self.parameters[v])
            self.domain_left_edge = np.array([0., 0., 0.])
            self.domain_right_edge = np.array([1., 1., 1.
                                               ]) * self.parameters['BoxSize']
        else:
            self.cosmological_simulation = 0
            self.omega_lambda = self.omega_matter = \
                self.hubble_constant = 0.0

    def _find_data_dir(self):
        """
        Find proper location for datasets.  First look where parameter file
        points, but if this doesn't exist then default to the current 
        directory.
        """
        if self.parameters["OutputDir"].startswith("/"):
            data_dir = self.parameters["OutputDir"]
        else:
            data_dir = os.path.join(self.directory,
                                    self.parameters["OutputDir"])
        if not os.path.exists(data_dir):
            mylog.info("OutputDir not found at %s, instead using %s." %
                       (data_dir, self.directory))
            data_dir = self.directory
        self.data_dir = data_dir

    def _snapshot_format(self, index=None):
        """
        The snapshot filename for a given index.  Modify this for different 
        naming conventions.
        """

        if self.parameters["NumFilesPerSnapshot"] > 1:
            suffix = ".0"
        else:
            suffix = ""
        if self.parameters["SnapFormat"] == 3:
            suffix += ".hdf5"
        if index is None:
            count = "*"
        else:
            count = "%03d" % index
        filename = "%s_%s%s" % (self.parameters["SnapshotFileBase"], count,
                                suffix)
        return os.path.join(self.data_dir, filename)

    def _get_all_outputs(self, find_outputs=False):
        """
        Get all potential datasets and combine into a time-sorted list.
        """

        # Find the data directory where the outputs are
        self._find_data_dir()

        # Create the set of outputs from which further selection will be done.
        if find_outputs:
            self._find_outputs()
        else:
            if self.parameters["OutputListOn"]:
                a_values = [
                    float(a) for a in open(
                        os.path.join(self.data_dir,
                                     self.parameters["OutputListFilename"]),
                        "r").readlines()
                ]
            else:
                a_values = [float(self.parameters["TimeOfFirstSnapshot"])]
                time_max = float(self.parameters["TimeMax"])
                while a_values[-1] < time_max:
                    if self.cosmological_simulation:
                        a_values.append(a_values[-1] *
                                        self.parameters["TimeBetSnapshot"])
                    else:
                        a_values.append(a_values[-1] +
                                        self.parameters["TimeBetSnapshot"])
                if a_values[-1] > time_max:
                    a_values[-1] = time_max

            if self.cosmological_simulation:
                self.all_outputs = \
                  [{"filename": self._snapshot_format(i),
                    "redshift": (1. / a - 1)}
                   for i, a in enumerate(a_values)]

                # Calculate times for redshift outputs.
                for output in self.all_outputs:
                    output["time"] = self.cosmology.t_from_z(
                        output["redshift"])
            else:
                self.all_outputs = \
                  [{"filename": self._snapshot_format(i),
                    "time": self.quan(a, "code_time")}
                   for i, a in enumerate(a_values)]

            self.all_outputs.sort(key=lambda obj: obj["time"].to_ndarray())

    def _calculate_simulation_bounds(self):
        """
        Figure out the starting and stopping time and redshift for the simulation.
        """

        # Convert initial/final redshifts to times.
        if self.cosmological_simulation:
            self.initial_time = self.cosmology.t_from_z(self.initial_redshift)
            self.initial_time.units.registry = self.unit_registry
            self.final_time = self.cosmology.t_from_z(self.final_redshift)
            self.final_time.units.registry = self.unit_registry

        # If not a cosmology simulation, figure out the stopping criteria.
        else:
            if "TimeBegin" in self.parameters:
                self.initial_time = self.quan(self.parameters["TimeBegin"],
                                              "code_time")
            else:
                self.initial_time = self.quan(0., "code_time")

            if "TimeMax" in self.parameters:
                self.final_time = self.quan(self.parameters["TimeMax"],
                                            "code_time")
            else:
                self.final_time = None
            if "TimeMax" not in self.parameters:
                raise NoStoppingCondition(self.parameter_filename)

    def _find_outputs(self):
        """
        Search for directories matching the data dump keywords.
        If found, get dataset times py opening the ds.
        """
        potential_outputs = glob.glob(self._snapshot_format())
        self.all_outputs = self._check_for_outputs(potential_outputs)
        self.all_outputs.sort(key=lambda obj: obj["time"])
        only_on_root(mylog.info, "Located %d total outputs.",
                     len(self.all_outputs))

        # manually set final time and redshift with last output
        if self.all_outputs:
            self.final_time = self.all_outputs[-1]["time"]
            if self.cosmological_simulation:
                self.final_redshift = self.all_outputs[-1]["redshift"]

    def _check_for_outputs(self, potential_outputs):
        r"""
        Check a list of files to see if they are valid datasets.
        """

        only_on_root(mylog.info, "Checking %d potential outputs.",
                     len(potential_outputs))

        my_outputs = {}
        for my_storage, output in parallel_objects(potential_outputs,
                                                   storage=my_outputs):
            if os.path.exists(output):
                try:
                    ds = load(output)
                    if ds is not None:
                        my_storage.result = {
                            "filename": output,
                            "time": ds.current_time.in_units("s")
                        }
                        if ds.cosmological_simulation:
                            my_storage.result["redshift"] = ds.current_redshift
                except YTOutputNotIdentified:
                    mylog.error("Failed to load %s", output)
        my_outputs = [my_output for my_output in my_outputs.values() \
                      if my_output is not None]
        return my_outputs

    def _write_cosmology_outputs(self,
                                 filename,
                                 outputs,
                                 start_index,
                                 decimals=3):
        r"""
        Write cosmology output parameters for a cosmology splice.
        """

        mylog.info("Writing redshift output list to %s.", filename)
        f = open(filename, "w")
        for output in outputs:
            f.write("%f\n" % (1. / (1. + output["redshift"])))
        f.close()
class Dataset(object):

    default_fluid_type = "gas"
    fluid_types = ("gas", "deposit", "index")
    particle_types = ("io", )  # By default we have an 'all'
    particle_types_raw = ("io", )
    geometry = "cartesian"
    coordinates = None
    max_level = 99
    storage_filename = None
    particle_unions = None
    known_filters = None
    _index_class = None
    field_units = None
    derived_field_list = requires_index("derived_field_list")
    _instantiated = False

    def __new__(cls, filename=None, *args, **kwargs):
        from yt.frontends.stream.data_structures import StreamHandler
        if not isinstance(filename, str):
            obj = object.__new__(cls)
            # The Stream frontend uses a StreamHandler object to pass metadata
            # to __init__.
            is_stream = (hasattr(filename, 'get_fields')
                         and hasattr(filename, 'get_particle_type'))
            if not is_stream:
                obj.__init__(filename, *args, **kwargs)
            return obj
        apath = os.path.abspath(filename)
        #if not os.path.exists(apath): raise IOError(filename)
        if ytcfg.getboolean("yt", "skip_dataset_cache"):
            obj = object.__new__(cls)
        elif apath not in _cached_datasets:
            obj = object.__new__(cls)
            if obj._skip_cache is False:
                _cached_datasets[apath] = obj
        else:
            obj = _cached_datasets[apath]
        return obj

    def __init__(self,
                 filename,
                 dataset_type=None,
                 file_style=None,
                 units_override=None):
        """
        Base class for generating new output types.  Principally consists of
        a *filename* and a *dataset_type* which will be passed on to children.
        """
        # We return early and do NOT initialize a second time if this file has
        # already been initialized.
        if self._instantiated: return
        self.dataset_type = dataset_type
        self.file_style = file_style
        self.conversion_factors = {}
        self.parameters = {}
        self.known_filters = self.known_filters or {}
        self.particle_unions = self.particle_unions or {}
        self.field_units = self.field_units or {}
        if units_override is None:
            units_override = {}
        self.units_override = units_override

        # path stuff
        self.parameter_filename = str(filename)
        self.basename = os.path.basename(filename)
        self.directory = os.path.expanduser(os.path.dirname(filename))
        self.fullpath = os.path.abspath(self.directory)
        self.backup_filename = self.parameter_filename + '_backup.gdf'
        self.read_from_backup = False
        if os.path.exists(self.backup_filename):
            self.read_from_backup = True
        if len(self.directory) == 0:
            self.directory = "."

        # to get the timing right, do this before the heavy lifting
        self._instantiated = time.time()

        self.min_level = 0
        self.no_cgs_equiv_length = False

        self._create_unit_registry()
        self._parse_parameter_file()
        self.set_units()
        self._setup_coordinate_handler()

        # Because we need an instantiated class to check the ds's existence in
        # the cache, we move that check to here from __new__.  This avoids
        # double-instantiation.
        try:
            _ds_store.check_ds(self)
        except NoParameterShelf:
            pass
        self.print_key_parameters()

        self._set_derived_attrs()
        self._setup_classes()

    def _set_derived_attrs(self):
        if self.domain_left_edge is None or self.domain_right_edge is None:
            self.domain_center = np.zeros(3)
            self.domain_width = np.zeros(3)
        else:
            self.domain_center = 0.5 * (self.domain_right_edge +
                                        self.domain_left_edge)
            self.domain_width = self.domain_right_edge - self.domain_left_edge
        if not isinstance(self.current_time, YTQuantity):
            self.current_time = self.quan(self.current_time, "code_time")
        for attr in ("center", "width", "left_edge", "right_edge"):
            n = "domain_%s" % attr
            v = getattr(self, n)
            v = self.arr(v, "code_length")
            setattr(self, n, v)

    def __reduce__(self):
        args = (self._hash(), )
        return (_reconstruct_ds, args)

    def __repr__(self):
        return self.basename

    def _hash(self):
        s = "%s;%s;%s" % (self.basename, self.current_time,
                          self.unique_identifier)
        try:
            import hashlib
            return hashlib.md5(s.encode('utf-8')).hexdigest()
        except ImportError:
            return s.replace(";", "*")

    @property
    def _mrep(self):
        return MinimalDataset(self)

    @property
    def _skip_cache(self):
        return False

    def hub_upload(self):
        self._mrep.upload()

    @classmethod
    def _is_valid(cls, *args, **kwargs):
        return False

    def __getitem__(self, key):
        """ Returns units, parameters, or conversion_factors in that order. """
        return self.parameters[key]

    def __iter__(self):
        for i in self.parameters:
            yield i

    def get_smallest_appropriate_unit(self,
                                      v,
                                      quantity='distance',
                                      return_quantity=False):
        """
        Returns the largest whole unit smaller than the YTQuantity passed to
        it as a string.

        The quantity keyword can be equal to `distance` or `time`.  In the
        case of distance, the units are: 'Mpc', 'kpc', 'pc', 'au', 'rsun',
        'km', etc.  For time, the units are: 'Myr', 'kyr', 'yr', 'day', 'hr',
        's', 'ms', etc.

        If return_quantity is set to True, it finds the largest YTQuantity
        object with a whole unit and a power of ten as the coefficient, and it
        returns this YTQuantity.
        """
        good_u = None
        if quantity == 'distance':
            unit_list = [
                'Ppc', 'Tpc', 'Gpc', 'Mpc', 'kpc', 'pc', 'au', 'rsun', 'km',
                'cm', 'um', 'nm', 'pm'
            ]
        elif quantity == 'time':
            unit_list = [
                'Yyr', 'Zyr', 'Eyr', 'Pyr', 'Tyr', 'Gyr', 'Myr', 'kyr', 'yr',
                'day', 'hr', 's', 'ms', 'us', 'ns', 'ps', 'fs'
            ]
        else:
            raise SyntaxError("Specified quantity must be equal to 'distance'"\
                              "or 'time'.")
        for unit in unit_list:
            uq = self.quan(1.0, unit)
            if uq <= v:
                good_u = unit
                break
        if good_u is None and quantity == 'distance': good_u = 'cm'
        if good_u is None and quantity == 'time': good_u = 's'
        if return_quantity:
            unit_index = unit_list.index(good_u)
            # This avoids indexing errors
            if unit_index == 0: return self.quan(1, unit_list[0])
            # Number of orders of magnitude between unit and next one up
            OOMs = np.ceil(
                np.log10(
                    self.quan(1, unit_list[unit_index - 1]) /
                    self.quan(1, unit_list[unit_index])))
            # Backwards order of coefficients (e.g. [100, 10, 1])
            coeffs = 10**np.arange(OOMs)[::-1]
            for j in coeffs:
                uq = self.quan(j, good_u)
                if uq <= v:
                    return uq
        else:
            return good_u

    def has_key(self, key):
        """
        Checks units, parameters, and conversion factors. Returns a boolean.

        """
        return key in self.parameters

    _instantiated_index = None

    @property
    def index(self):
        if self._instantiated_index is None:
            if self._index_class is None:
                raise RuntimeError("You should not instantiate Dataset.")
            self._instantiated_index = self._index_class(
                self, dataset_type=self.dataset_type)
            # Now we do things that we need an instantiated index for
            # ...first off, we create our field_info now.
            oldsettings = np.geterr()
            np.seterr(all='ignore')
            self.create_field_info()
            np.seterr(**oldsettings)
        return self._instantiated_index

    _index_proxy = None

    @property
    def h(self):
        if self._index_proxy is None:
            self._index_proxy = IndexProxy(self)
        return self._index_proxy

    hierarchy = h

    @parallel_root_only
    def print_key_parameters(self):
        for a in [
                "current_time", "domain_dimensions", "domain_left_edge",
                "domain_right_edge", "cosmological_simulation"
        ]:
            if not hasattr(self, a):
                mylog.error("Missing %s in parameter file definition!", a)
                continue
            v = getattr(self, a)
            mylog.info("Parameters: %-25s = %s", a, v)
        if hasattr(self, "cosmological_simulation") and \
           getattr(self, "cosmological_simulation"):
            for a in [
                    "current_redshift", "omega_lambda", "omega_matter",
                    "hubble_constant"
            ]:
                if not hasattr(self, a):
                    mylog.error("Missing %s in parameter file definition!", a)
                    continue
                v = getattr(self, a)
                mylog.info("Parameters: %-25s = %s", a, v)

    @parallel_root_only
    def print_stats(self):
        self.index.print_stats()

    @property
    def field_list(self):
        return self.index.field_list

    def create_field_info(self):
        self.field_dependencies = {}
        self.derived_field_list = []
        self.filtered_particle_types = []

        self.field_info = self._field_info_class(self, self.field_list)

        self.coordinates.setup_fields(self.field_info)
        self.field_info.setup_fluid_fields()
        for ptype in self.particle_types:

            self.field_info.setup_particle_fields(ptype)
        if "all" not in self.particle_types:
            mylog.debug("Creating Particle Union 'all'")
            pu = ParticleUnion("all", list(self.particle_types_raw))
            self.add_particle_union(pu)
        self.field_info.setup_extra_union_fields()
        mylog.info("Loading field plugins.")
        self.field_info.load_all_plugins()
        deps, unloaded = self.field_info.check_derived_fields()
        self.field_dependencies.update(deps)

    def setup_deprecated_fields(self):
        from yt.fields.field_aliases import _field_name_aliases
        added = []
        for old_name, new_name in _field_name_aliases:
            try:
                fi = self._get_field_info(new_name)
            except YTFieldNotFound:
                continue
            self.field_info.alias(("gas", old_name), fi.name)
            added.append(("gas", old_name))
        self.field_info.find_dependencies(added)

    def _setup_coordinate_handler(self):
        kwargs = {}
        if isinstance(self.geometry, tuple):
            self.geometry, ordering = self.geometry
            kwargs['ordering'] = ordering
        if isinstance(self.geometry, CoordinateHandler):
            # I kind of dislike this.  The geometry field should always be a
            # string, but the way we're set up with subclassing, we can't
            # mandate that quite the way I'd like.
            self.coordinates = self.geometry
            return
        elif callable(self.geometry):
            cls = self.geometry
        elif self.geometry == "cartesian":
            cls = CartesianCoordinateHandler
        elif self.geometry == "cylindrical":
            cls = CylindricalCoordinateHandler
        elif self.geometry == "polar":
            cls = PolarCoordinateHandler
        elif self.geometry == "spherical":
            cls = SphericalCoordinateHandler
        elif self.geometry == "geographic":
            cls = GeographicCoordinateHandler
        elif self.geometry == "spectral_cube":
            cls = SpectralCubeCoordinateHandler
        else:
            raise YTGeometryNotSupported(self.geometry)
        self.coordinates = cls(self, **kwargs)

    def add_particle_union(self, union):
        # No string lookups here, we need an actual union.
        f = self.particle_fields_by_type
        fields = set_intersection([
            f[s] for s in union
            if s in self.particle_types_raw and len(f[s]) > 0
        ])
        for field in fields:
            units = set([])
            for s in union:
                # First we check our existing fields for units
                funits = self._get_field_info(s, field).units
                # Then we override with field_units settings.
                funits = self.field_units.get((s, field), funits)
                units.add(funits)
            if len(units) == 1:
                self.field_units[union.name, field] = list(units)[0]
        self.particle_types += (union.name, )
        self.particle_unions[union.name] = union
        fields = [(union.name, field) for field in fields]
        self.field_list.extend(fields)
        # Give ourselves a chance to add them here, first, then...
        # ...if we can't find them, we set them up as defaults.
        new_fields = self._setup_particle_types([union.name])
        rv = self.field_info.find_dependencies(new_fields)

    def add_particle_filter(self, filter):
        # This requires an index
        self.index
        # This is a dummy, which we set up to enable passthrough of "all"
        # concatenation fields.
        n = getattr(filter, "name", filter)
        self.known_filters[n] = None
        if isinstance(filter, str):
            used = False
            for f in filter_registry[filter]:
                used = self._setup_filtered_type(f)
                if used:
                    filter = f
                    break
        else:
            used = self._setup_filtered_type(filter)
        if not used:
            self.known_filters.pop(n, None)
            return False
        self.known_filters[filter.name] = filter
        return True

    def _setup_filtered_type(self, filter):
        if not filter.available(self.derived_field_list):
            return False
        fi = self.field_info
        fd = self.field_dependencies
        available = False
        for fn in self.derived_field_list:
            if fn[0] == filter.filtered_type:
                # Now we can add this
                available = True
                self.derived_field_list.append((filter.name, fn[1]))
                fi[filter.name, fn[1]] = filter.wrap_func(fn, fi[fn])
                # Now we append the dependencies
                fd[filter.name, fn[1]] = fd[fn]
        if available:
            self.particle_types += (filter.name, )
            self.filtered_particle_types.append(filter.name)
            new_fields = self._setup_particle_types([filter.name])
            deps, _ = self.field_info.check_derived_fields(new_fields)
            self.field_dependencies.update(deps)
        return available

    def _setup_particle_types(self, ptypes=None):
        df = []
        if ptypes is None: ptypes = self.ds.particle_types_raw
        for ptype in set(ptypes):
            df += self._setup_particle_type(ptype)
        return df

    _last_freq = (None, None)
    _last_finfo = None

    def _get_field_info(self, ftype, fname=None):
        self.index
        if fname is None:
            ftype, fname = "unknown", ftype
        guessing_type = False
        if ftype == "unknown":
            guessing_type = True
            ftype = self._last_freq[0] or ftype
        field = (ftype, fname)
        if field == self._last_freq:
            return self._last_finfo
        if field in self.field_info:
            self._last_freq = field
            self._last_finfo = self.field_info[(ftype, fname)]
            return self._last_finfo

        if fname in self.field_info:
            # Sometimes, if guessing_type == True, this will be switched for
            # the type of field it is.  So we look at the field type and
            # determine if we need to change the type.
            fi = self._last_finfo = self.field_info[fname]
            if fi.particle_type and self._last_freq[0] \
                not in self.particle_types:
                field = "all", field[1]
            elif not fi.particle_type and self._last_freq[0] \
                not in self.fluid_types:
                field = self.default_fluid_type, field[1]
            self._last_freq = field
            return self._last_finfo
        # We also should check "all" for particles, which can show up if you're
        # mixing deposition/gas fields with particle fields.

        if guessing_type:
            to_guess = ["all", self.default_fluid_type] \
                     + list(self.fluid_types) \
                     + list(self.particle_types)
            for ftype in to_guess:
                if (ftype, fname) in self.field_info:
                    self._last_freq = (ftype, fname)
                    self._last_finfo = self.field_info[(ftype, fname)]
                    return self._last_finfo
        raise YTFieldNotFound((ftype, fname), self)

    def _setup_classes(self):
        # Called by subclass
        self.object_types = []
        self.objects = []
        self.plots = []
        for name, cls in sorted(data_object_registry.items()):
            if name in self._index_class._unsupported_objects:
                setattr(self, name, _unsupported_object(self, name))
                continue
            cname = cls.__name__
            if cname.endswith("Base"): cname = cname[:-4]
            self._add_object_class(name, cname, cls,
                                   {'ds': weakref.proxy(self)})
        if self.refine_by != 2 and hasattr(self, 'proj') and \
            hasattr(self, 'overlap_proj'):
            mylog.warning("Refine by something other than two: reverting to" +
                          " overlap_proj")
            self.proj = self.overlap_proj
        if self.dimensionality < 3 and hasattr(self, 'proj') and \
            hasattr(self, 'overlap_proj'):
            mylog.warning("Dimensionality less than 3: reverting to" +
                          " overlap_proj")
            self.proj = self.overlap_proj
        self.object_types.sort()

    def _add_object_class(self, name, class_name, base, dd):
        self.object_types.append(name)
        dd.update({'__doc__': base.__doc__})
        obj = type(class_name, (base, ), dd)
        setattr(self, name, obj)

    def find_max(self, field):
        """
        Returns (value, location) of the maximum of a given field.
        """
        mylog.debug("Searching for maximum value of %s", field)
        source = self.all_data()
        max_val, maxi, mx, my, mz = \
            source.quantities.max_location(field)
        mylog.info("Max Value is %0.5e at %0.16f %0.16f %0.16f", max_val, mx,
                   my, mz)
        return max_val, self.arr([mx, my, mz], 'code_length', dtype="float64")

    def find_min(self, field):
        """
        Returns (value, location) for the minimum of a given field.
        """
        mylog.debug("Searching for minimum value of %s", field)
        source = self.all_data()
        min_val, maxi, mx, my, mz = \
            source.quantities.min_location(field)
        mylog.info("Min Value is %0.5e at %0.16f %0.16f %0.16f", min_val, mx,
                   my, mz)
        return min_val, self.arr([mx, my, mz], 'code_length', dtype="float64")

    def find_field_values_at_point(self, fields, coords):
        """
        Returns the values [field1, field2,...] of the fields at the given
        coordinates. Returns a list of field values in the same order as
        the input *fields*.
        """
        return self.point(coords)[fields]

    def find_field_values_at_points(self, fields, coords):
        """
        Returns the values [field1, field2,...] of the fields at the given
        [(x1, y1, z2), (x2, y2, z2),...] points.  Returns a list of field
        values in the same order as the input *fields*.

        This is quite slow right now as it creates a new data object for each
        point.  If an optimized version exists on the Index object we'll use
        that instead.
        """
        if hasattr(self,"index") and \
                hasattr(self.index,"_find_field_values_at_points"):
            return self.index._find_field_values_at_points(fields, coords)

        fields = ensure_list(fields)
        out = np.zeros((len(fields), len(coords)), dtype=np.float64)
        for i, coord in enumerate(coords):
            out[:][i] = self.point(coord)[fields]
        return out

    # Now all the object related stuff
    def all_data(self, find_max=False, **kwargs):
        """
        all_data is a wrapper to the Region object for creating a region
        which covers the entire simulation domain.
        """
        if find_max: c = self.find_max("density")[1]
        else: c = (self.domain_right_edge + self.domain_left_edge) / 2.0
        return self.region(c, self.domain_left_edge, self.domain_right_edge,
                           **kwargs)

    def box(self, left_edge, right_edge, **kwargs):
        """
        box is a wrapper to the Region object for creating a region
        without having to specify a *center* value.  It assumes the center
        is the midpoint between the left_edge and right_edge.
        """
        left_edge = np.array(left_edge)
        right_edge = np.array(right_edge)
        c = (left_edge + right_edge) / 2.0
        return self.region(c, left_edge, right_edge, **kwargs)

    def _setup_particle_type(self, ptype):
        orig = set(self.field_info.items())
        self.field_info.setup_particle_fields(ptype)
        return [n for n, v in set(self.field_info.items()).difference(orig)]

    @property
    def particle_fields_by_type(self):
        fields = defaultdict(list)
        for field in self.field_list:
            if field[0] in self.particle_types_raw:
                fields[field[0]].append(field[1])
        return fields

    @property
    def ires_factor(self):
        o2 = np.log2(self.refine_by)
        if o2 != int(o2):
            raise RuntimeError
        return int(o2)

    def relative_refinement(self, l0, l1):
        return self.refine_by**(l1 - l0)

    def _create_unit_registry(self):
        self.unit_registry = UnitRegistry()
        import yt.units.dimensions as dimensions
        self.unit_registry.add("code_length", 1.0, dimensions.length)
        self.unit_registry.add("code_mass", 1.0, dimensions.mass)
        self.unit_registry.add("code_density", 1.0, dimensions.density)
        self.unit_registry.add("code_time", 1.0, dimensions.time)
        self.unit_registry.add("code_magnetic", 1.0, dimensions.magnetic_field)
        self.unit_registry.add("code_temperature", 1.0, dimensions.temperature)
        self.unit_registry.add("code_pressure", 1.0, dimensions.pressure)
        self.unit_registry.add("code_velocity", 1.0, dimensions.velocity)
        self.unit_registry.add("code_metallicity", 1.0,
                               dimensions.dimensionless)

    def set_units(self):
        """
        Creates the unit registry for this dataset.

        """
        from yt.units.dimensions import length
        if hasattr(self, "cosmological_simulation") \
           and getattr(self, "cosmological_simulation"):
            # this dataset is cosmological, so add cosmological units.
            self.unit_registry.modify("h", self.hubble_constant)
            # Comoving lengths
            for my_unit in ["m", "pc", "AU", "au"]:
                new_unit = "%scm" % my_unit
                self.unit_registry.add(
                    new_unit, self.unit_registry.lut[my_unit][0] /
                    (1 + self.current_redshift), length,
                    "\\rm{%s}/(1+z)" % my_unit)

        self.set_code_units()

        if hasattr(self, "cosmological_simulation") \
           and getattr(self, "cosmological_simulation"):
            # this dataset is cosmological, add a cosmology object
            setattr(
                self, "cosmology",
                Cosmology(hubble_constant=self.hubble_constant,
                          omega_matter=self.omega_matter,
                          omega_lambda=self.omega_lambda,
                          unit_registry=self.unit_registry))
            setattr(self, "critical_density",
                    self.cosmology.critical_density(self.current_redshift))

    def get_unit_from_registry(self, unit_str):
        """
        Creates a unit object matching the string expression, using this
        dataset's unit registry.

        Parameters
        ----------
        unit_str : str
            string that we can parse for a sympy Expr.

        """
        new_unit = Unit(unit_str, registry=self.unit_registry)
        return new_unit

    def set_code_units(self):
        self._set_code_unit_attributes()
        # here we override units, if overrides have been provided.
        self._override_code_units()
        self.unit_registry.modify("code_length", self.length_unit)
        self.unit_registry.modify("code_mass", self.mass_unit)
        self.unit_registry.modify("code_time", self.time_unit)
        if hasattr(self, 'magnetic_unit'):
            # If we do not have this set, but some fields come in in
            # "code_magnetic", this will allow them to remain in that unit.
            self.unit_registry.modify("code_magnetic", self.magnetic_unit)
        vel_unit = getattr(self, "velocity_unit",
                           self.length_unit / self.time_unit)
        pressure_unit = getattr(
            self, "pressure_unit",
            self.mass_unit / (self.length_unit * self.time_unit)**2)
        temperature_unit = getattr(self, "temperature_unit", 1.0)
        density_unit = getattr(self, "density_unit",
                               self.mass_unit / self.length_unit**3)
        self.unit_registry.modify("code_velocity", vel_unit)
        self.unit_registry.modify("code_temperature", temperature_unit)
        self.unit_registry.modify("code_pressure", pressure_unit)
        self.unit_registry.modify("code_density", density_unit)
        # domain_width does not yet exist
        if (self.domain_left_edge is not None
                and self.domain_right_edge is not None):
            DW = self.arr(self.domain_right_edge - self.domain_left_edge,
                          "code_length")
            self.unit_registry.add("unitary",
                                   float(DW.max() * DW.units.base_value),
                                   DW.units.dimensions)

    def _override_code_units(self):
        if len(self.units_override) == 0:
            return
        mylog.warning(
            "Overriding code units. This is an experimental and potentially " +
            "dangerous option that may yield inconsistent results, and must be used "
            + "very carefully, and only if you know what you want from it.")
        for unit, cgs in [("length", "cm"), ("time", "s"), ("mass", "g"),
                          ("velocity", "cm/s"), ("magnetic", "gauss"),
                          ("temperature", "K")]:
            val = self.units_override.get("%s_unit" % unit, None)
            if val is not None:
                if isinstance(val, YTQuantity):
                    val = (val.v, str(val.units))
                elif not isinstance(val, tuple):
                    val = (val, cgs)
                u = getattr(self, "%s_unit" % unit)
                mylog.info("Overriding %s_unit: %g %s -> %g %s.", unit, u.v,
                           u.units, val[0], val[1])
                setattr(self, "%s_unit" % unit, self.quan(val[0], val[1]))

    _arr = None

    @property
    def arr(self):
        """Converts an array into a :class:`yt.units.yt_array.YTArray`

        The returned YTArray will be dimensionless by default, but can be
        cast to arbitray units using the ``input_units`` keyword argument.

        Parameters
        ----------

        input_array : iterable
            A tuple, list, or array to attach units to
        input_units : String unit specification, unit symbol or astropy object
            The units of the array. Powers must be specified using python syntax
            (cm**3, not cm^3).
        dtype : string or NumPy dtype object
            The dtype of the returned array data

        Examples
        --------

        >>> import yt
        >>> import numpy as np
        >>> ds = yt.load('IsolatedGalaxy/galaxy0030/galaxy0030')
        >>> a = ds.arr([1, 2, 3], 'cm')
        >>> b = ds.arr([4, 5, 6], 'm')
        >>> a + b
        YTArray([ 401.,  502.,  603.]) cm
        >>> b + a
        YTArray([ 4.01,  5.02,  6.03]) m

        Arrays returned by this function know about the dataset's unit system

        >>> a = ds.arr(np.ones(5), 'code_length')
        >>> a.in_units('Mpccm/h')
        YTArray([ 1.00010449,  1.00010449,  1.00010449,  1.00010449,
                 1.00010449]) Mpc

        """

        if self._arr is not None:
            return self._arr
        self._arr = functools.partial(YTArray, registry=self.unit_registry)
        return self._arr

    _quan = None

    @property
    def quan(self):
        """Converts an scalar into a :class:`yt.units.yt_array.YTQuantity`

        The returned YTQuantity will be dimensionless by default, but can be
        cast to arbitray units using the ``input_units`` keyword argument.

        Parameters
        ----------

        input_scalar : an integer or floating point scalar
            The scalar to attach units to
        input_units : String unit specification, unit symbol or astropy object
            The units of the quantity. Powers must be specified using python
            syntax (cm**3, not cm^3).
        dtype : string or NumPy dtype object
            The dtype of the array data.

        Examples
        --------

        >>> import yt
        >>> ds = yt.load('IsolatedGalaxy/galaxy0030/galaxy0030')

        >>> a = ds.quan(1, 'cm')
        >>> b = ds.quan(2, 'm')
        >>> a + b
        201.0 cm
        >>> b + a
        2.01 m

        Quantities created this way automatically know about the unit system
        of the dataset.

        >>> a = ds.quan(5, 'code_length')
        >>> a.in_cgs()
        1.543e+25 cm

        """

        if self._quan is not None:
            return self._quan
        self._quan = functools.partial(YTQuantity, registry=self.unit_registry)
        return self._quan

    def add_field(self, name, function=None, **kwargs):
        """
        Dataset-specific call to add_field

        Add a new field, along with supplemental metadata, to the list of
        available fields.  This respects a number of arguments, all of which
        are passed on to the constructor for
        :class:`~yt.data_objects.api.DerivedField`.

        Parameters
        ----------

        name : str
           is the name of the field.
        function : callable
           A function handle that defines the field.  Should accept
           arguments (field, data)
        units : str
           A plain text string encoding the unit.  Powers must be in
           python syntax (** instead of ^).
        take_log : bool
           Describes whether the field should be logged
        validators : list
           A list of :class:`FieldValidator` objects
        particle_type : bool
           Is this a particle (1D) field?
        vector_field : bool
           Describes the dimensionality of the field.  Currently unused.
        display_name : str
           A name used in the plots

        """
        self.index
        override = kwargs.get("force_override", False)
        # Handle the case where the field has already been added.
        if not override and name in self.field_info:
            mylog.warning(
                "Field %s already exists. To override use " +
                "force_override=True.", name)
        self.field_info.add_field(name, function=function, **kwargs)
        self.field_info._show_field_errors.append(name)
        deps, _ = self.field_info.check_derived_fields([name])
        self.field_dependencies.update(deps)

    def add_deposited_particle_field(self, deposit_field, method):
        """Add a new deposited particle field

        Creates a new deposited field based on the particle *deposit_field*.

        Parameters
        ----------

        deposit_field : tuple
           The field name tuple of the particle field the deposited field will
           be created from.  This must be a field name tuple so yt can
           appropriately infer the correct particle type.
        method : one of 'count', 'sum', or 'cic'
           The particle deposition method to use.

        Returns
        -------

        The field name tuple for the newly created field.
        """
        self.index
        if isinstance(deposit_field, tuple):
            ptype, deposit_field = deposit_field[0], deposit_field[1]
        else:
            raise RuntimeError
        units = self.field_info[ptype, deposit_field].units

        def _deposit_field(field, data):
            """
            Create a grid field for particle wuantities weighted by particle
            mass, using cloud-in-cell deposition.
            """
            pos = data[ptype, "particle_position"]
            # get back into density
            if method != 'count':
                pden = data[ptype, "particle_mass"]
                top = data.deposit(pos, [data[(ptype, deposit_field)] * pden],
                                   method=method)
                bottom = data.deposit(pos, [pden], method=method)
                top[bottom == 0] = 0.0
                bnz = bottom.nonzero()
                top[bnz] /= bottom[bnz]
                d = data.ds.arr(top, input_units=units)
            else:
                d = data.ds.arr(
                    data.deposit(pos, [data[ptype, deposit_field]],
                                 method=method))
            return d

        name_map = {"cic": "cic", "sum": "nn", "count": "count"}
        field_name = "%s_" + name_map[method] + "_%s"
        field_name = field_name % (ptype, deposit_field.replace(
            'particle_', ''))
        self.add_field(("deposit", field_name),
                       function=_deposit_field,
                       units=units,
                       take_log=False,
                       validators=[ValidateSpatial()])
        return ("deposit", field_name)

    def add_gradient_fields(self, input_field):
        """Add gradient fields.

        Creates four new grid-based fields that represent the components of
        the gradient of an existing field, plus an extra field for the magnitude
        of the gradient. Currently only supported in Cartesian geometries. The
        gradient is computed using second-order centered differences.

        Parameters
        ----------
        input_field : tuple
           The field name tuple of the particle field the deposited field will
           be created from.  This must be a field name tuple so yt can
           appropriately infer the correct field type.

        Returns
        -------
        A list of field name tuples for the newly created fields.

        Examples
        --------
        >>> grad_fields = ds.add_gradient_fields(("gas","temperature"))
        >>> print(grad_fields)
        [('gas', 'temperature_gradient_x'),
         ('gas', 'temperature_gradient_y'),
         ('gas', 'temperature_gradient_z'),
         ('gas', 'temperature_gradient_magnitude')]
        """
        self.index
        if isinstance(input_field, tuple):
            ftype, input_field = input_field[0], input_field[1]
        else:
            raise RuntimeError
        units = self.field_info[ftype, input_field].units
        setup_gradient_fields(self.field_info, (ftype, input_field), units)
        # Now we make a list of the fields that were just made, to check them
        # and to return them
        grad_fields = [(ftype, input_field + "_gradient_%s" % suffix)
                       for suffix in "xyz"]
        grad_fields.append((ftype, input_field + "_gradient_magnitude"))
        deps, _ = self.field_info.check_derived_fields(grad_fields)
        self.field_dependencies.update(deps)
        return grad_fields
Example #33
0
class Arbor(object):
    """
    Base class for all Arbor classes.

    Loads a merger-tree output file or a series of halo catalogs
    and create trees, stored in an array in
    :func:`~ytree.arbor.arbor.Arbor.trees`.
    Arbors can be saved in a universal format with
    :func:`~ytree.arbor.arbor.Arbor.save_arbor`.  Also, provide some
    convenience functions for creating YTArrays and YTQuantities and
    a cosmology calculator.
    """

    _field_info_class = FieldInfoContainer
    _root_field_io_class = FallbackRootFieldIO
    _tree_field_io_class = TreeFieldIO

    def __init__(self, filename):
        """
        Initialize an Arbor given an input file.
        """

        self.filename = filename
        self.basename = os.path.basename(filename)
        self._parse_parameter_file()
        self._set_units()
        self._root_field_data = FieldContainer(self)
        self._node_io = self._tree_field_io_class(self)
        self._root_io = self._root_field_io_class(self)
        self._get_data_files()
        self._setup_fields()
        self._set_default_selector()

    def _get_data_files(self):
        """
        Get all files that hold field data and make them known
        to the i/o system.
        """
        pass

    def _parse_parameter_file(self):
        """
        Read relevant parameters from parameter file or file header
        and detect fields.
        """
        raise NotImplementedError

    def _plant_trees(self):
        """
        Create the list of root tree nodes.
        """
        raise NotImplementedError

    def is_setup(self, tree_node):
        """
        Return True if arrays of uids and descendent uids have
        been read in.
        """
        return tree_node.root != -1 or \
          tree_node._uids is not None

    def _setup_tree(self, tree_node, **kwargs):
        """
        Create arrays of uids and descids and attach them to the
        root node.
        """
        # skip if this is not a root or if already setup
        if self.is_setup(tree_node):
            return

        idtype = np.int64
        fields, _ = \
          self.field_info.resolve_field_dependencies(["uid", "desc_uid"])
        halo_id_f, desc_id_f = fields
        dtypes = {halo_id_f: idtype, desc_id_f: idtype}
        field_data = self._node_io._read_fields(tree_node,
                                                fields,
                                                dtypes=dtypes,
                                                **kwargs)
        tree_node._uids = field_data[halo_id_f]
        tree_node._descids = field_data[desc_id_f]
        tree_node._tree_size = tree_node._uids.size

    def is_grown(self, tree_node):
        """
        Return True if a tree has been fully assembled, i.e.,
        the hierarchy of ancestor tree nodes has been built.
        """
        return hasattr(tree_node, "treeid")

    def _grow_tree(self, tree_node, **kwargs):
        """
        Create an array of TreeNodes hanging off the root node
        and assemble the tree structure.
        """
        # skip this if not a root or if already grown
        if self.is_grown(tree_node):
            return

        self._setup_tree(tree_node, **kwargs)
        nhalos = tree_node.uids.size
        nodes = np.empty(nhalos, dtype=np.object)
        nodes[0] = tree_node
        for i in range(1, nhalos):
            nodes[i] = TreeNode(tree_node.uids[i], arbor=self)
        tree_node._nodes = nodes

        # Add tree information to nodes
        uidmap = {}
        for i, node in enumerate(nodes):
            node.treeid = i
            node.root = tree_node
            uidmap[tree_node.uids[i]] = i

        # Link ancestor/descendents
        # Separate loop for trees like lhalotree where descendent
        # can follow in order
        for i, node in enumerate(nodes):
            descid = tree_node.descids[i]
            if descid != -1:
                desc = nodes[uidmap[descid]]
                desc.add_ancestor(node)
                node.descendent = desc

    def _node_io_loop(self, func, *args, **kwargs):
        """
        Call the provided function over a list of nodes.

        If possible, group nodes by common data files to speed
        things up.  This should work like __iter__, except we call
        a function instead of yielding.

        Parameters
        ----------
        func : function
            Function to be called on an array of nodes.
        pbar : optional, string or yt.funcs.TqdmProgressBar
            A progress bar to be updated with each iteration.
            If a string, a progress bar will be created and the
            finish function will be called. If a progress bar is
            provided, the finish function will not be called.
            Default: None (no progress bar).
        root_nodes : optional, array of root TreeNodes
            Array of nodes over which the function will be called.
            If None, the list will be self.trees (i.e., all
            root_nodes).
            Default: None.
        """

        pbar = kwargs.pop("pbar", None)
        root_nodes = kwargs.pop("root_nodes", None)
        if root_nodes is None:
            root_nodes = self.trees
        data_files, node_list = self._node_io_loop_prepare(root_nodes)
        nnodes = sum([nodes.size for nodes in node_list])

        finish = True
        if pbar is None:
            pbar = fake_pbar("", nnodes)
        elif not isinstance(pbar, TqdmProgressBar):
            pbar = get_pbar(pbar, nnodes)
        else:
            finish = False

        for data_file, nodes in zip(data_files, node_list):
            self._node_io_loop_start(data_file)
            for node in nodes:
                func(node, *args, **kwargs)
                pbar.update(1)
            self._node_io_loop_finish(data_file)

        if finish:
            pbar.finish()

    def _node_io_loop_start(self, data_file):
        pass

    def _node_io_loop_finish(self, data_file):
        pass

    def _node_io_loop_prepare(self, root_nodes):
        """
        This is called at the beginning of _node_io_loop.

        In different frontends, this can be used to group nodes by
        common data files.

        Return
        ------
        list of data files and a list of node arrays

        Each data file corresponds to an array of nodes.
        """

        return [None], [root_nodes]

    def __iter__(self):
        """
        Iterate over all items in the tree list.

        If possible, group nodes by common data files to speed
        things up.
        """

        data_files, node_list = self._node_io_loop_prepare(self.trees)

        for data_file, nodes in zip(data_files, node_list):
            self._node_io_loop_start(data_file)
            for node in nodes:
                yield node
            self._node_io_loop_finish(data_file)

    _trees = None

    @property
    def trees(self):
        """
        Array containing all trees in the arbor.
        """
        if self._trees is None:
            self._plant_trees()
        return self._trees

    def __repr__(self):
        return self.basename

    def __getitem__(self, key):
        return self.query(key)

    def query(self, key):
        """
        If given a string, return an array of field values for the
        roots of all trees.
        If given an integer, return a tree from the list of trees.

        """
        if isinstance(key, string_types):
            if key in ("tree", "prog"):
                raise SyntaxError("Argument must be a field or integer.")
            self._root_io.get_fields(self, fields=[key])
            if self.field_info[key].get("type") == "analysis":
                return self._root_field_data.pop(key)
            return self._root_field_data[key]
        return self.trees[key]

    def __len__(self):
        """
        Return length of tree list.
        """
        return self.trees.size

    _field_info = None

    @property
    def field_info(self):
        """
        A dictionary containing information for each available field.
        """
        if self._field_info is None and \
          self._field_info_class is not None:
            self._field_info = self._field_info_class(self)
        return self._field_info

    @property
    def size(self):
        """
        Return length of tree list.
        """
        return self.trees.size

    _unit_registry = None

    @property
    def unit_registry(self):
        """
        Unit system registry.
        """
        if self._unit_registry is None:
            self._unit_registry = UnitRegistry()
        return self._unit_registry

    @unit_registry.setter
    def unit_registry(self, value):
        self._unit_registry = value
        self._arr = None
        self._quan = None

    _hubble_constant = None

    @property
    def hubble_constant(self):
        """
        Value of the Hubble parameter.
        """
        return self._hubble_constant

    @hubble_constant.setter
    def hubble_constant(self, value):
        self._hubble_constant = value
        # reset the unit registry lut while preserving other changes
        self.unit_registry = UnitRegistry.from_json(
            self.unit_registry.to_json())
        self.unit_registry.modify("h", self.hubble_constant)

    _box_size = None

    @property
    def box_size(self):
        """
        The simulation box size.
        """
        return self._box_size

    @box_size.setter
    def box_size(self, value):
        self._box_size = value
        # set unitary as soon as we know the box size
        self.unit_registry.add("unitary", float(self.box_size.in_base()),
                               length)

    def _setup_fields(self):
        self.derived_field_list = []
        self.analysis_field_list = []
        self.field_info.setup_known_fields()
        self.field_info.setup_aliases()
        self.field_info.setup_derived_fields()

    def _set_units(self):
        """
        Set "cm" units for explicitly comoving.
        Note, we are using comoving units all the time since
        we are dealing with data at multiple redshifts.
        """
        for my_unit in ["m", "pc", "AU", "au"]:
            new_unit = "%scm" % my_unit
            self._unit_registry.add(new_unit,
                                    self._unit_registry.lut[my_unit][0],
                                    length,
                                    self._unit_registry.lut[my_unit][3])

        self.cosmology = Cosmology(hubble_constant=self.hubble_constant,
                                   omega_matter=self.omega_matter,
                                   omega_lambda=self.omega_lambda,
                                   unit_registry=self.unit_registry)

    def set_selector(self, selector, *args, **kwargs):
        r"""
        Sets the tree node selector to be used.

        This sets the manner in which halo progenitors are
        chosen from a list of ancestors.  The most obvious example
        is to select the most massive ancestor.

        Parameters
        ----------
        selector : string
            Name of the selector to be used.

        Any additional arguments and keywords to be provided to
        the selector function should follow.

        Examples
        --------

        >>> import ytree
        >>> a = ytree.load("rockstar_halos/trees/tree_0_0_0.dat")
        >>> a.set_selector("max_field_value", "mass")

        """
        self.selector = tree_node_selector_registry.find(
            selector, *args, **kwargs)

    _arr = None

    @property
    def arr(self):
        """
        Create a YTArray using the Arbor's unit registry.
        """
        if self._arr is not None:
            return self._arr
        self._arr = functools.partial(YTArray, registry=self.unit_registry)
        return self._arr

    _quan = None

    @property
    def quan(self):
        """
        Create a YTQuantity using the Arbor's unit registry.
        """
        if self._quan is not None:
            return self._quan
        self._quan = functools.partial(YTQuantity, registry=self.unit_registry)
        return self._quan

    def _set_default_selector(self):
        """
        Set the default tree node selector as maximum mass.
        """
        self.set_selector("max_field_value", "mass")

    def select_halos(self,
                     criteria,
                     trees=None,
                     select_from="tree",
                     fields=None):
        """
        Select halos from the arbor based on a set of criteria given as a string.
        """

        if select_from not in ["tree", "prog"]:
            raise SyntaxError(
                "Keyword \"select_from\" must be either \"tree\" or \"prog\".")

        if trees is None:
            trees = self.trees

        if fields is None:
            fields = []

        self._node_io_loop(self._setup_tree,
                           root_nodes=trees,
                           pbar="Setting up trees")
        if fields:
            self._node_io_loop(self._node_io.get_fields,
                               pbar="Getting fields",
                               root_nodes=trees,
                               fields=fields,
                               root_only=False)

        halos = []
        pbar = get_pbar("Selecting halos", self.trees.size)
        for tree in trees:
            my_filter = eval(criteria)
            halos.extend(tree[select_from][my_filter])
            pbar.update(1)
        pbar.finish()
        return np.array(halos)

    def add_analysis_field(self, name, units):
        r"""
        Add an empty field to be filled by analysis operations.

        Parameters
        ----------
        name : string
            Field name.
        units : string
            Field units.

        Examples
        --------

        >>> import ytree
        >>> a = ytree.load("tree_0_0_0.dat")
        >>> a.add_analysis_field("robots", "Msun * kpc")
        >>> # Set field for some halo.
        >>> a[0]["tree"][7]["robots"] = 1979.816
        """

        if name in self.field_info:
            raise ArborFieldAlreadyExists(name, arbor=self)

        self.analysis_field_list.append(name)
        self.field_info[name] = {"type": "analysis", "units": units}

    def add_alias_field(self, alias, field, units=None, force_add=True):
        r"""
        Add a field as an alias to another field.

        Parameters
        ----------
        alias : string
            Alias name.
        field : string
            The field to be aliased.
        units : optional, string
            Units in which the field will be returned.
        force_add : optional, bool
            If True, add field even if it already exists and warn the
            user and raise an exception if dependencies do not exist.
            If False, silently do nothing in both instances.
            Default: True.

        Examples
        --------

        >>> import ytree
        >>> a = ytree.load("tree_0_0_0.dat")
        >>> # "Mvir" exists on disk
        >>> a.add_alias_field("mass", "Mvir", units="Msun")
        >>> print (a["mass"])

        """

        if alias in self.field_info:
            if force_add:
                ftype = self.field_info[alias].get("type", "on-disk")
                if ftype in ["alias", "derived"]:
                    fl = self.derived_field_list
                else:
                    fl = self.field_list
                mylog.warn(("Overriding field \"%s\" that already " +
                            "exists as %s field.") % (alias, ftype))
                fl.pop(fl.index(alias))
            else:
                return

        if field not in self.field_info:
            if force_add:
                raise ArborFieldDependencyNotFound(field, alias, arbor=self)
            else:
                return

        if units is None:
            units = self.field_info[field].get("units")
        self.derived_field_list.append(alias)
        self.field_info[alias] = \
          {"type": "alias", "units": units,
           "dependencies": [field]}
        if "aliases" not in self.field_info[field]:
            self.field_info[field]["aliases"] = []
            self.field_info[field]["aliases"].append(alias)

    def add_derived_field(self,
                          name,
                          function,
                          units=None,
                          description=None,
                          force_add=True):
        r"""
        Add a field that is a function of other fields.

        Parameters
        ----------
        name : string
            Field name.
        function : callable
            The function to be called to generate the field.
            This function should take two arguments, the
            arbor and the data structure containing the
            dependent fields.  See below for an example.
        units : optional, string
            The units in which the field will be returned.
        description : optional, string
            A short description of the field.
        force_add : optional, bool
            If True, add field even if it already exists and warn the
            user and raise an exception if dependencies do not exist.
            If False, silently do nothing in both instances.
            Default: True.

        Examples
        --------

        >>> import ytree
        >>> a = ytree.load("tree_0_0_0.dat")
        >>> def _redshift(arbor, data):
        ...     return 1. / data["scale"] - 1
        ...
        >>> a.add_derived_field("redshift", _redshift)
        >>> print (a["redshift"])

        """

        if name in self.field_info:
            if force_add:
                ftype = self.field_info[name].get("type", "on-disk")
                if ftype in ["alias", "derived"]:
                    fl = self.derived_field_list
                else:
                    fl = self.field_list
                mylog.warn(("Overriding field \"%s\" that already " +
                            "exists as %s field.") % (name, ftype))
                fl.pop(fl.index(name))
            else:
                return

        if units is None:
            units = ""
        fc = FakeFieldContainer(self, name=name)
        try:
            rv = function(fc)
        except ArborFieldDependencyNotFound as e:
            if force_add:
                raise e
            else:
                return
        rv.convert_to_units(units)
        self.derived_field_list.append(name)
        self.field_info[name] = \
          {"type": "derived", "function": function,
           "units": units, "description": description,
           "dependencies": list(fc.keys())}

    @classmethod
    def _is_valid(cls, *args, **kwargs):
        """
        Check if input file works with a specific Arbor class.
        This is used with :func:`~ytree.arbor.arbor.load` function.
        """
        return False

    def save_arbor(self,
                   filename="arbor",
                   fields=None,
                   trees=None,
                   max_file_size=524288):
        r"""
        Save the arbor to a file.

        The saved arbor can be re-loaded as an arbor.

        Parameters
        ----------
        filename : optional, string
            Output file keyword.  If filename ends in ".h5",
            the main header file will be just that.  If not,
            filename will be <filename>/<basename>.h5.
            Default: "arbor".
        fields : optional, list of strings
            The fields to be saved.  If not given, all
            fields will be saved.

        Returns
        -------
        header_filename : string
            The filename of the saved arbor.

        Examples
        --------

        >>> import ytree
        >>> a = ytree.load("rockstar_halos/trees/tree_0_0_0.dat")
        >>> fn = a.save_arbor()
        >>> # reload it
        >>> a2 = ytree.load(fn)

        """

        if trees is None:
            all_trees = True
            trees = self.trees
            roots = trees
        else:
            all_trees = False
            # assemble unique tree roots for getting fields
            trees = np.asarray(trees)
            roots = []
            root_uids = []
            for tree in trees:
                if tree.root == -1:
                    my_root = tree
                else:
                    my_root = tree.root
                if my_root.uid not in root_uids:
                    roots.append(my_root)
                    root_uids.append(my_root.uid)
            roots = np.array(roots)
            del root_uids

        if fields in [None, "all"]:
            # If a field has an alias, get that instead.
            fields = []
            for field in self.field_list + self.analysis_field_list:
                fields.extend(self.field_info[field].get("aliases", [field]))
        else:
            fields.extend([f for f in ["uid", "desc_uid"] if f not in fields])

        ds = {}
        for attr in ["hubble_constant", "omega_matter", "omega_lambda"]:
            if hasattr(self, attr):
                ds[attr] = getattr(self, attr)
        extra_attrs = {
            "box_size": self.box_size,
            "arbor_type": "YTreeArbor",
            "unit_registry_json": self.unit_registry.to_json()
        }

        self._node_io_loop(self._setup_tree,
                           root_nodes=roots,
                           pbar="Setting up trees")
        self._root_io.get_fields(self, fields=fields)

        # determine file layout
        nn = 0  # node count
        nt = 0  # tree count
        nnodes = []
        ntrees = []
        tree_size = np.array([tree.tree_size for tree in trees])
        for ts in tree_size:
            nn += ts
            nt += 1
            if nn > max_file_size:
                nnodes.append(nn - ts)
                ntrees.append(nt - 1)
                nn = ts
                nt = 1
        if nn > 0:
            nnodes.append(nn)
            ntrees.append(nt)
        nfiles = len(nnodes)
        nnodes = np.array(nnodes)
        ntrees = np.array(ntrees)
        tree_end_index = ntrees.cumsum()
        tree_start_index = tree_end_index - ntrees

        # write header file
        fieldnames = [field.replace("/", "_") for field in fields]
        myfi = {}
        rdata = {}
        rtypes = {}
        for field, fieldname in zip(fields, fieldnames):
            fi = self.field_info[field]
            myfi[fieldname] = \
              dict((key, fi[key])
                   for key in ["units", "description"]
                   if key in fi)
            if all_trees:
                rdata[fieldname] = self._root_field_data[field]
            else:
                rdata[fieldname] = self.arr([t[field] for t in trees])
            rtypes[fieldname] = "data"
        # all saved trees will be roots
        if not all_trees:
            rdata["desc_uid"][:] = -1
        extra_attrs["field_info"] = json.dumps(myfi)
        extra_attrs["total_files"] = nfiles
        extra_attrs["total_trees"] = trees.size
        extra_attrs["total_nodes"] = tree_size.sum()
        hdata = {
            "tree_start_index": tree_start_index,
            "tree_end_index": tree_end_index,
            "tree_size": ntrees
        }
        hdata.update(rdata)
        htypes = dict((f, "index") for f in hdata)
        htypes.update(rtypes)

        filename = _determine_output_filename(filename, ".h5")
        header_filename = "%s.h5" % filename
        save_as_dataset(ds,
                        header_filename,
                        hdata,
                        field_types=htypes,
                        extra_attrs=extra_attrs)

        # write data files
        ftypes = dict((f, "data") for f in fieldnames)
        for i in range(nfiles):
            my_nodes = trees[tree_start_index[i]:tree_end_index[i]]
            self._node_io_loop(self._node_io.get_fields,
                               pbar="Getting fields [%d/%d]" % (i + 1, nfiles),
                               root_nodes=my_nodes,
                               fields=fields,
                               root_only=False)
            fdata = dict((field, np.empty(nnodes[i])) for field in fieldnames)
            my_tree_size = tree_size[tree_start_index[i]:tree_end_index[i]]
            my_tree_end = my_tree_size.cumsum()
            my_tree_start = my_tree_end - my_tree_size
            pbar = get_pbar("Creating field arrays [%d/%d]" % (i + 1, nfiles),
                            len(fields) * nnodes[i])
            c = 0
            for field, fieldname in zip(fields, fieldnames):
                for di, node in enumerate(my_nodes):
                    if node.is_root:
                        ndata = node._tree_field_data[field]
                    else:
                        ndata = node["tree", field]
                        if field == "desc_uid":
                            # make sure it's a root when loaded
                            ndata[0] = -1
                    fdata[fieldname][my_tree_start[di]:my_tree_end[di]] = ndata
                    c += my_tree_size[di]
                    pbar.update(c)
            pbar.finish()
            fdata["tree_start_index"] = my_tree_start
            fdata["tree_end_index"] = my_tree_end
            fdata["tree_size"] = my_tree_size
            for ft in ["tree_start_index", "tree_end_index", "tree_size"]:
                ftypes[ft] = "index"
            my_filename = "%s_%04d.h5" % (filename, i)
            save_as_dataset({}, my_filename, fdata, field_types=ftypes)

        return header_filename
Example #34
0
 def hubble_constant(self, value):
     self._hubble_constant = value
     # reset the unit registry lut while preserving other changes
     self.unit_registry = UnitRegistry.from_json(
         self.unit_registry.to_json())
     self.unit_registry.modify("h", self.hubble_constant)
Example #35
0
 def __init__(self, filename, hubble_constant=1.0):
     self.unit_registry = UnitRegistry()
     self.hubble_constant = hubble_constant
     super(AHFArbor, self).__init__(filename)
Example #36
0
 def __init__(self,
              simulation_ds=None,
              halos_ds=None,
              make_analytic=True,
              omega_matter0=0.2726,
              omega_lambda0=0.7274,
              omega_baryon0=0.0456,
              hubble0=0.704,
              sigma8=0.86,
              primordial_index=1.0,
              this_redshift=0,
              log_mass_min=None,
              log_mass_max=None,
              num_sigma_bins=360,
              fitting_function=4):
     self.simulation_ds = simulation_ds
     self.halos_ds = halos_ds
     self.omega_matter0 = omega_matter0
     self.omega_lambda0 = omega_lambda0
     self.omega_baryon0 = omega_baryon0
     self.hubble0 = hubble0
     self.sigma8 = sigma8
     self.primordial_index = primordial_index
     self.this_redshift = this_redshift
     self.log_mass_min = log_mass_min
     self.log_mass_max = log_mass_max
     self.num_sigma_bins = num_sigma_bins
     self.fitting_function = fitting_function
     self.make_analytic = make_analytic
     self.make_simulated = False
     """
     If we want to make an analytic mass function, grab what we can from either the 
     halo file or the data set, and make sure that the user supplied everything else
     that is needed.
     """
     # If we don't have any datasets, make the analytic function with user values
     if simulation_ds is None and halos_ds is None:
         # Set a reasonable mass min and max if none were provided
         if log_mass_min is None:
             self.log_mass_min = 5
         if log_mass_max is None:
             self.log_mass_max = 16
     # If we're making the analytic function...
     if self.make_analytic is True:
         # Try to set cosmological parameters from the simulation dataset
         if simulation_ds is not None:
             self.omega_matter0 = self.simulation_ds.omega_matter
             self.omega_lambda0 = self.simulation_ds.omega_lambda
             self.hubble0 = self.simulation_ds.hubble_constant
             self.this_redshift = self.simulation_ds.current_redshift
             # Set a reasonable mass min and max if none were provided
             if log_mass_min is None:
                 self.log_mass_min = 5
             if log_mass_max is None:
                 self.log_mass_max = 16
         # If we have a halo dataset but not a simulation dataset, use that instead
         if simulation_ds is None and halos_ds is not None:
             self.omega_matter0 = self.halos_ds.omega_matter
             self.omega_lambda0 = self.halos_ds.omega_lambda
             self.hubble0 = self.halos_ds.hubble_constant
             self.this_redshift = self.halos_ds.current_redshift
             # If the user didn't specify mass min and max, set them from the halos
             if log_mass_min is None:
                 self.set_mass_from_halos("min_mass")
             if log_mass_max is None:
                 self.set_mass_from_halos("max_mass")
         # Do the calculations.
         self.sigmaM()
         self.dndm()
         # Return the mass array in M_solar rather than M_solar/h
         self.masses_analytic = YTArray(self.masses_analytic / self.hubble0,
                                        "Msun")
         # The halo arrays will already have yt units, but the analytic forms do
         # not. If a dataset has been provided, use that to give them units. At the
         # same time, convert to comoving (Mpc)^-3
         if simulation_ds is not None:
             self.n_cumulative_analytic = simulation_ds.arr(
                 self.n_cumulative_analytic, "(Mpccm)**(-3)")
         elif halos_ds is not None:
             self.n_cumulative_analytic = halos_ds.arr(
                 self.n_cumulative_analytic, "(Mpccm)**(-3)")
         else:
             from yt.units.unit_registry import UnitRegistry
             from yt.units.dimensions import length
             hmf_registry = UnitRegistry()
             for my_unit in ["m", "pc", "AU", "au"]:
                 new_unit = "%scm" % my_unit
                 hmf_registry.add(
                     new_unit, hmf_registry.lut[my_unit][0] /
                     (1 + self.this_redshift), length,
                     "\\rm{%s}/(1+z)" % my_unit)
             self.n_cumulative_analytic = YTArray(
                 self.n_cumulative_analytic,
                 "(Mpccm)**(-3)",
                 registry=hmf_registry)
     """
     If a halo file has been supplied, make a mass function for the simulated halos.
     """
     if halos_ds is not None:
         # Used to check if a simulated halo mass function exists to write out
         self.make_simulated = True
         # Calculate the simulated halo mass function
         self.create_sim_hmf()
class Dataset(object):

    default_fluid_type = "gas"
    fluid_types = ("gas", "deposit", "index")
    particle_types = ("io",) # By default we have an 'all'
    particle_types_raw = ("io",)
    geometry = "cartesian"
    coordinates = None
    max_level = 99
    storage_filename = None
    particle_unions = None
    known_filters = None
    _index_class = None
    field_units = None
    derived_field_list = requires_index("derived_field_list")
    _instantiated = False

    def __new__(cls, filename=None, *args, **kwargs):
        from yt.frontends.stream.data_structures import StreamHandler
        if not isinstance(filename, str):
            obj = object.__new__(cls)
            # The Stream frontend uses a StreamHandler object to pass metadata
            # to __init__.
            is_stream = (hasattr(filename, 'get_fields') and
                         hasattr(filename, 'get_particle_type'))
            if not is_stream:
                obj.__init__(filename, *args, **kwargs)
            return obj
        apath = os.path.abspath(filename)
        #if not os.path.exists(apath): raise IOError(filename)
        if ytcfg.getboolean("yt","skip_dataset_cache"):
            obj = object.__new__(cls)
        elif apath not in _cached_datasets:
            obj = object.__new__(cls)
            if obj._skip_cache is False:
                _cached_datasets[apath] = obj
        else:
            obj = _cached_datasets[apath]
        return obj

    def __init__(self, filename, dataset_type=None, file_style=None, units_override=None):
        """
        Base class for generating new output types.  Principally consists of
        a *filename* and a *dataset_type* which will be passed on to children.
        """
        # We return early and do NOT initialize a second time if this file has
        # already been initialized.
        if self._instantiated: return
        self.dataset_type = dataset_type
        self.file_style = file_style
        self.conversion_factors = {}
        self.parameters = {}
        self.known_filters = self.known_filters or {}
        self.particle_unions = self.particle_unions or {}
        self.field_units = self.field_units or {}
        if units_override is None:
            units_override = {}
        self.units_override = units_override

        # path stuff
        self.parameter_filename = str(filename)
        self.basename = os.path.basename(filename)
        self.directory = os.path.expanduser(os.path.dirname(filename))
        self.fullpath = os.path.abspath(self.directory)
        self.backup_filename = self.parameter_filename + '_backup.gdf'
        self.read_from_backup = False
        if os.path.exists(self.backup_filename):
            self.read_from_backup = True
        if len(self.directory) == 0:
            self.directory = "."

        # to get the timing right, do this before the heavy lifting
        self._instantiated = time.time()

        self.min_level = 0
        self.no_cgs_equiv_length = False

        self._create_unit_registry()
        self._parse_parameter_file()
        self.set_units()
        self._setup_coordinate_handler()

        # Because we need an instantiated class to check the ds's existence in
        # the cache, we move that check to here from __new__.  This avoids
        # double-instantiation.
        try:
            _ds_store.check_ds(self)
        except NoParameterShelf:
            pass
        self.print_key_parameters()

        self._set_derived_attrs()
        self._setup_classes()

    def _set_derived_attrs(self):
        if self.domain_left_edge is None or self.domain_right_edge is None:
            self.domain_center = np.zeros(3)
            self.domain_width = np.zeros(3)
        else:
            self.domain_center = 0.5 * (self.domain_right_edge + self.domain_left_edge)
            self.domain_width = self.domain_right_edge - self.domain_left_edge
        if not isinstance(self.current_time, YTQuantity):
            self.current_time = self.quan(self.current_time, "code_time")
        for attr in ("center", "width", "left_edge", "right_edge"):
            n = "domain_%s" % attr
            v = getattr(self, n)
            v = self.arr(v, "code_length")
            setattr(self, n, v)

    def __reduce__(self):
        args = (self._hash(),)
        return (_reconstruct_ds, args)

    def __repr__(self):
        return self.basename

    def _hash(self):
        s = "%s;%s;%s" % (self.basename,
            self.current_time, self.unique_identifier)
        try:
            import hashlib
            return hashlib.md5(s.encode('utf-8')).hexdigest()
        except ImportError:
            return s.replace(";", "*")

    @property
    def _mrep(self):
        return MinimalDataset(self)

    @property
    def _skip_cache(self):
        return False

    def hub_upload(self):
        self._mrep.upload()

    @classmethod
    def _is_valid(cls, *args, **kwargs):
        return False

    def __getitem__(self, key):
        """ Returns units, parameters, or conversion_factors in that order. """
        return self.parameters[key]

    def __iter__(self):
      for i in self.parameters: yield i

    def get_smallest_appropriate_unit(self, v, quantity='distance',
                                      return_quantity=False):
        """
        Returns the largest whole unit smaller than the YTQuantity passed to
        it as a string.

        The quantity keyword can be equal to `distance` or `time`.  In the
        case of distance, the units are: 'Mpc', 'kpc', 'pc', 'au', 'rsun',
        'km', etc.  For time, the units are: 'Myr', 'kyr', 'yr', 'day', 'hr',
        's', 'ms', etc.

        If return_quantity is set to True, it finds the largest YTQuantity
        object with a whole unit and a power of ten as the coefficient, and it
        returns this YTQuantity.
        """
        good_u = None
        if quantity == 'distance':
            unit_list =['Ppc', 'Tpc', 'Gpc', 'Mpc', 'kpc', 'pc', 'au', 'rsun',
                        'km', 'cm', 'um', 'nm', 'pm']
        elif quantity == 'time':
            unit_list =['Yyr', 'Zyr', 'Eyr', 'Pyr', 'Tyr', 'Gyr', 'Myr', 'kyr',
                        'yr', 'day', 'hr', 's', 'ms', 'us', 'ns', 'ps', 'fs']
        else:
            raise SyntaxError("Specified quantity must be equal to 'distance'"\
                              "or 'time'.")
        for unit in unit_list:
            uq = self.quan(1.0, unit)
            if uq <= v:
                good_u = unit
                break
        if good_u is None and quantity == 'distance': good_u = 'cm'
        if good_u is None and quantity == 'time': good_u = 's'
        if return_quantity:
            unit_index = unit_list.index(good_u)
            # This avoids indexing errors
            if unit_index == 0: return self.quan(1, unit_list[0])
            # Number of orders of magnitude between unit and next one up
            OOMs = np.ceil(np.log10(self.quan(1, unit_list[unit_index-1]) /
                                    self.quan(1, unit_list[unit_index])))
            # Backwards order of coefficients (e.g. [100, 10, 1])
            coeffs = 10**np.arange(OOMs)[::-1]
            for j in coeffs:
                uq = self.quan(j, good_u)
                if uq <= v:
                    return uq
        else:
            return good_u

    def has_key(self, key):
        """
        Checks units, parameters, and conversion factors. Returns a boolean.

        """
        return key in self.parameters

    _instantiated_index = None
    @property
    def index(self):
        if self._instantiated_index is None:
            if self._index_class is None:
                raise RuntimeError("You should not instantiate Dataset.")
            self._instantiated_index = self._index_class(
                self, dataset_type=self.dataset_type)
            # Now we do things that we need an instantiated index for
            # ...first off, we create our field_info now.
            oldsettings = np.geterr()
            np.seterr(all='ignore')
            self.create_field_info()
            np.seterr(**oldsettings)
        return self._instantiated_index

    _index_proxy = None
    @property
    def h(self):
        if self._index_proxy is None:
            self._index_proxy = IndexProxy(self)
        return self._index_proxy
    hierarchy = h

    @parallel_root_only
    def print_key_parameters(self):
        for a in ["current_time", "domain_dimensions", "domain_left_edge",
                  "domain_right_edge", "cosmological_simulation"]:
            if not hasattr(self, a):
                mylog.error("Missing %s in parameter file definition!", a)
                continue
            v = getattr(self, a)
            mylog.info("Parameters: %-25s = %s", a, v)
        if hasattr(self, "cosmological_simulation") and \
           getattr(self, "cosmological_simulation"):
            for a in ["current_redshift", "omega_lambda", "omega_matter",
                      "hubble_constant"]:
                if not hasattr(self, a):
                    mylog.error("Missing %s in parameter file definition!", a)
                    continue
                v = getattr(self, a)
                mylog.info("Parameters: %-25s = %s", a, v)

    @parallel_root_only
    def print_stats(self):
        self.index.print_stats()

    @property
    def field_list(self):
        return self.index.field_list

    def create_field_info(self):
        self.field_dependencies = {}
        self.derived_field_list = []
        self.filtered_particle_types = []
	
        self.field_info = self._field_info_class(self, self.field_list)
        
	self.coordinates.setup_fields(self.field_info)
        self.field_info.setup_fluid_fields()
        for ptype in self.particle_types:
	    
            self.field_info.setup_particle_fields(ptype)
        if "all" not in self.particle_types:
            mylog.debug("Creating Particle Union 'all'")
            pu = ParticleUnion("all", list(self.particle_types_raw))
            self.add_particle_union(pu)
        self.field_info.setup_extra_union_fields()
        mylog.info("Loading field plugins.")
        self.field_info.load_all_plugins()
        deps, unloaded = self.field_info.check_derived_fields()
        self.field_dependencies.update(deps)

    def setup_deprecated_fields(self):
        from yt.fields.field_aliases import _field_name_aliases
        added = []
        for old_name, new_name in _field_name_aliases:
            try:
                fi = self._get_field_info(new_name)
            except YTFieldNotFound:
                continue
            self.field_info.alias(("gas", old_name), fi.name)
            added.append(("gas", old_name))
        self.field_info.find_dependencies(added)

    def _setup_coordinate_handler(self):
        kwargs = {}
        if isinstance(self.geometry, tuple):
            self.geometry, ordering = self.geometry
            kwargs['ordering'] = ordering
        if isinstance(self.geometry, CoordinateHandler):
            # I kind of dislike this.  The geometry field should always be a
            # string, but the way we're set up with subclassing, we can't
            # mandate that quite the way I'd like.
            self.coordinates = self.geometry
            return
        elif callable(self.geometry):
            cls = self.geometry
        elif self.geometry == "cartesian":
            cls = CartesianCoordinateHandler
        elif self.geometry == "cylindrical":
            cls = CylindricalCoordinateHandler
        elif self.geometry == "polar":
            cls = PolarCoordinateHandler
        elif self.geometry == "spherical":
            cls = SphericalCoordinateHandler
        elif self.geometry == "geographic":
            cls = GeographicCoordinateHandler
        elif self.geometry == "spectral_cube":
            cls = SpectralCubeCoordinateHandler
        else:
            raise YTGeometryNotSupported(self.geometry)
        self.coordinates = cls(self, **kwargs)

    def add_particle_union(self, union):
        # No string lookups here, we need an actual union.
        f = self.particle_fields_by_type
        fields = set_intersection([f[s] for s in union
                                   if s in self.particle_types_raw
                                   and len(f[s]) > 0])
        for field in fields:
            units = set([])
            for s in union:
                # First we check our existing fields for units
                funits = self._get_field_info(s, field).units
                # Then we override with field_units settings.
                funits = self.field_units.get((s, field), funits)
                units.add(funits)
            if len(units) == 1:
                self.field_units[union.name, field] = list(units)[0]
        self.particle_types += (union.name,)
        self.particle_unions[union.name] = union
        fields = [ (union.name, field) for field in fields]
        self.field_list.extend(fields)
        # Give ourselves a chance to add them here, first, then...
        # ...if we can't find them, we set them up as defaults.
        new_fields = self._setup_particle_types([union.name])
        rv = self.field_info.find_dependencies(new_fields)

    def add_particle_filter(self, filter):
        # This requires an index
        self.index
        # This is a dummy, which we set up to enable passthrough of "all"
        # concatenation fields.
        n = getattr(filter, "name", filter)
        self.known_filters[n] = None
        if isinstance(filter, str):
            used = False
            for f in filter_registry[filter]:
                used = self._setup_filtered_type(f)
                if used:
                    filter = f
                    break
        else:
            used = self._setup_filtered_type(filter)
        if not used:
            self.known_filters.pop(n, None)
            return False
        self.known_filters[filter.name] = filter
        return True

    def _setup_filtered_type(self, filter):
        if not filter.available(self.derived_field_list):
            return False
        fi = self.field_info
        fd = self.field_dependencies
        available = False
        for fn in self.derived_field_list:
            if fn[0] == filter.filtered_type:
                # Now we can add this
                available = True
                self.derived_field_list.append(
                    (filter.name, fn[1]))
                fi[filter.name, fn[1]] = filter.wrap_func(fn, fi[fn])
                # Now we append the dependencies
                fd[filter.name, fn[1]] = fd[fn]
        if available:
            self.particle_types += (filter.name,)
            self.filtered_particle_types.append(filter.name)
            new_fields = self._setup_particle_types([filter.name])
            deps, _ = self.field_info.check_derived_fields(new_fields)
            self.field_dependencies.update(deps)
        return available

    def _setup_particle_types(self, ptypes = None):
        df = []
        if ptypes is None: ptypes = self.ds.particle_types_raw
        for ptype in set(ptypes):
            df += self._setup_particle_type(ptype)
        return df

    _last_freq = (None, None)
    _last_finfo = None
    def _get_field_info(self, ftype, fname = None):
        self.index
        if fname is None:
            ftype, fname = "unknown", ftype
        guessing_type = False
        if ftype == "unknown":
            guessing_type = True
            ftype = self._last_freq[0] or ftype
        field = (ftype, fname)
        if field == self._last_freq:
            return self._last_finfo
        if field in self.field_info:
            self._last_freq = field
            self._last_finfo = self.field_info[(ftype, fname)]
            return self._last_finfo
	
        if fname in self.field_info:
            # Sometimes, if guessing_type == True, this will be switched for
            # the type of field it is.  So we look at the field type and
            # determine if we need to change the type.
            fi = self._last_finfo = self.field_info[fname]
            if fi.particle_type and self._last_freq[0] \
                not in self.particle_types:
                    field = "all", field[1]
            elif not fi.particle_type and self._last_freq[0] \
                not in self.fluid_types:
                    field = self.default_fluid_type, field[1]
            self._last_freq = field
            return self._last_finfo
        # We also should check "all" for particles, which can show up if you're
        # mixing deposition/gas fields with particle fields.
	
        if guessing_type:
            to_guess = ["all", self.default_fluid_type] \
                     + list(self.fluid_types) \
                     + list(self.particle_types)
            for ftype in to_guess:
                if (ftype, fname) in self.field_info:
                    self._last_freq = (ftype, fname)
                    self._last_finfo = self.field_info[(ftype, fname)]
                    return self._last_finfo
        raise YTFieldNotFound((ftype, fname), self)

    def _setup_classes(self):
        # Called by subclass
        self.object_types = []
        self.objects = []
        self.plots = []
        for name, cls in sorted(data_object_registry.items()):
            if name in self._index_class._unsupported_objects:
                setattr(self, name,
                    _unsupported_object(self, name))
                continue
            cname = cls.__name__
            if cname.endswith("Base"): cname = cname[:-4]
            self._add_object_class(name, cname, cls, {'ds':weakref.proxy(self)})
        if self.refine_by != 2 and hasattr(self, 'proj') and \
            hasattr(self, 'overlap_proj'):
            mylog.warning("Refine by something other than two: reverting to"
                        + " overlap_proj")
            self.proj = self.overlap_proj
        if self.dimensionality < 3 and hasattr(self, 'proj') and \
            hasattr(self, 'overlap_proj'):
            mylog.warning("Dimensionality less than 3: reverting to"
                        + " overlap_proj")
            self.proj = self.overlap_proj
        self.object_types.sort()

    def _add_object_class(self, name, class_name, base, dd):
        self.object_types.append(name)
        dd.update({'__doc__': base.__doc__})
        obj = type(class_name, (base,), dd)
        setattr(self, name, obj)

    def find_max(self, field):
        """
        Returns (value, location) of the maximum of a given field.
        """
        mylog.debug("Searching for maximum value of %s", field)
        source = self.all_data()
        max_val, maxi, mx, my, mz = \
            source.quantities.max_location(field)
        mylog.info("Max Value is %0.5e at %0.16f %0.16f %0.16f",
              max_val, mx, my, mz)
        return max_val, self.arr([mx, my, mz], 'code_length', dtype="float64")

    def find_min(self, field):
        """
        Returns (value, location) for the minimum of a given field.
        """
        mylog.debug("Searching for minimum value of %s", field)
        source = self.all_data()
        min_val, maxi, mx, my, mz = \
            source.quantities.min_location(field)
        mylog.info("Min Value is %0.5e at %0.16f %0.16f %0.16f",
              min_val, mx, my, mz)
        return min_val, self.arr([mx, my, mz], 'code_length', dtype="float64")

    def find_field_values_at_point(self, fields, coords):
        """
        Returns the values [field1, field2,...] of the fields at the given
        coordinates. Returns a list of field values in the same order as
        the input *fields*.
        """
        return self.point(coords)[fields]

    def find_field_values_at_points(self, fields, coords):
        """
        Returns the values [field1, field2,...] of the fields at the given
        [(x1, y1, z2), (x2, y2, z2),...] points.  Returns a list of field
        values in the same order as the input *fields*.

        This is quite slow right now as it creates a new data object for each
        point.  If an optimized version exists on the Index object we'll use
        that instead.
        """
        if hasattr(self,"index") and \
                hasattr(self.index,"_find_field_values_at_points"):
            return self.index._find_field_values_at_points(fields,coords)

        fields = ensure_list(fields)
        out = np.zeros((len(fields),len(coords)), dtype=np.float64)
        for i,coord in enumerate(coords):
            out[:][i] = self.point(coord)[fields]
        return out

    # Now all the object related stuff
    def all_data(self, find_max=False, **kwargs):
        """
        all_data is a wrapper to the Region object for creating a region
        which covers the entire simulation domain.
        """
        if find_max: c = self.find_max("density")[1]
        else: c = (self.domain_right_edge + self.domain_left_edge)/2.0
        return self.region(c,
            self.domain_left_edge, self.domain_right_edge, **kwargs)

    def box(self, left_edge, right_edge, **kwargs):
        """
        box is a wrapper to the Region object for creating a region
        without having to specify a *center* value.  It assumes the center
        is the midpoint between the left_edge and right_edge.
        """
        left_edge = np.array(left_edge)
        right_edge = np.array(right_edge)
        c = (left_edge + right_edge)/2.0
        return self.region(c, left_edge, right_edge, **kwargs)

    def _setup_particle_type(self, ptype):
        orig = set(self.field_info.items())
        self.field_info.setup_particle_fields(ptype)
        return [n for n, v in set(self.field_info.items()).difference(orig)]

    @property
    def particle_fields_by_type(self):
        fields = defaultdict(list)
        for field in self.field_list:
            if field[0] in self.particle_types_raw:
                fields[field[0]].append(field[1])
        return fields

    @property
    def ires_factor(self):
        o2 = np.log2(self.refine_by)
        if o2 != int(o2):
            raise RuntimeError
        return int(o2)

    def relative_refinement(self, l0, l1):
        return self.refine_by**(l1-l0)

    def _create_unit_registry(self):
        self.unit_registry = UnitRegistry()
        import yt.units.dimensions as dimensions
        self.unit_registry.add("code_length", 1.0, dimensions.length)
        self.unit_registry.add("code_mass", 1.0, dimensions.mass)
        self.unit_registry.add("code_density", 1.0, dimensions.density)
        self.unit_registry.add("code_time", 1.0, dimensions.time)
        self.unit_registry.add("code_magnetic", 1.0, dimensions.magnetic_field)
        self.unit_registry.add("code_temperature", 1.0, dimensions.temperature)
        self.unit_registry.add("code_pressure", 1.0, dimensions.pressure)
        self.unit_registry.add("code_velocity", 1.0, dimensions.velocity)
        self.unit_registry.add("code_metallicity", 1.0,
                               dimensions.dimensionless)

    def set_units(self):
        """
        Creates the unit registry for this dataset.

        """
        from yt.units.dimensions import length
        if hasattr(self, "cosmological_simulation") \
           and getattr(self, "cosmological_simulation"):
            # this dataset is cosmological, so add cosmological units.
            self.unit_registry.modify("h", self.hubble_constant)
            # Comoving lengths
            for my_unit in ["m", "pc", "AU", "au"]:
                new_unit = "%scm" % my_unit
                self.unit_registry.add(new_unit, self.unit_registry.lut[my_unit][0] /
                                       (1 + self.current_redshift),
                                       length, "\\rm{%s}/(1+z)" % my_unit)

        self.set_code_units()

        if hasattr(self, "cosmological_simulation") \
           and getattr(self, "cosmological_simulation"):
            # this dataset is cosmological, add a cosmology object
            setattr(self, "cosmology",
                    Cosmology(hubble_constant=self.hubble_constant,
                              omega_matter=self.omega_matter,
                              omega_lambda=self.omega_lambda,
                              unit_registry=self.unit_registry))
            setattr(self, "critical_density",
                    self.cosmology.critical_density(self.current_redshift))

    def get_unit_from_registry(self, unit_str):
        """
        Creates a unit object matching the string expression, using this
        dataset's unit registry.

        Parameters
        ----------
        unit_str : str
            string that we can parse for a sympy Expr.

        """
        new_unit = Unit(unit_str, registry=self.unit_registry)
        return new_unit

    def set_code_units(self):
        self._set_code_unit_attributes()
        # here we override units, if overrides have been provided.
        self._override_code_units()
        self.unit_registry.modify("code_length", self.length_unit)
        self.unit_registry.modify("code_mass", self.mass_unit)
        self.unit_registry.modify("code_time", self.time_unit)
        if hasattr(self, 'magnetic_unit'):
            # If we do not have this set, but some fields come in in
            # "code_magnetic", this will allow them to remain in that unit.
            self.unit_registry.modify("code_magnetic", self.magnetic_unit)
        vel_unit = getattr(
            self, "velocity_unit", self.length_unit / self.time_unit)
        pressure_unit = getattr(
            self, "pressure_unit",
            self.mass_unit / (self.length_unit * self.time_unit)**2)
        temperature_unit = getattr(self, "temperature_unit", 1.0)
        density_unit = getattr(self, "density_unit", self.mass_unit / self.length_unit**3)
        self.unit_registry.modify("code_velocity", vel_unit)
        self.unit_registry.modify("code_temperature", temperature_unit)
        self.unit_registry.modify("code_pressure", pressure_unit)
        self.unit_registry.modify("code_density", density_unit)
        # domain_width does not yet exist
        if (self.domain_left_edge is not None and
            self.domain_right_edge is not None):
            DW = self.arr(self.domain_right_edge - self.domain_left_edge, "code_length")
            self.unit_registry.add("unitary", float(DW.max() * DW.units.base_value),
                                   DW.units.dimensions)

    def _override_code_units(self):
        if len(self.units_override) == 0:
            return
        mylog.warning("Overriding code units. This is an experimental and potentially "+
                      "dangerous option that may yield inconsistent results, and must be used "+
                      "very carefully, and only if you know what you want from it.")
        for unit, cgs in [("length", "cm"), ("time", "s"), ("mass", "g"),
                          ("velocity","cm/s"), ("magnetic","gauss"), ("temperature","K")]:
            val = self.units_override.get("%s_unit" % unit, None)
            if val is not None:
                if isinstance(val, YTQuantity):
                    val = (val.v, str(val.units))
                elif not isinstance(val, tuple):
                    val = (val, cgs)
                u = getattr(self, "%s_unit" % unit)
                mylog.info("Overriding %s_unit: %g %s -> %g %s.", unit, u.v, u.units, val[0], val[1])
                setattr(self, "%s_unit" % unit, self.quan(val[0], val[1]))

    _arr = None
    @property
    def arr(self):
        """Converts an array into a :class:`yt.units.yt_array.YTArray`

        The returned YTArray will be dimensionless by default, but can be
        cast to arbitray units using the ``input_units`` keyword argument.

        Parameters
        ----------

        input_array : iterable
            A tuple, list, or array to attach units to
        input_units : String unit specification, unit symbol or astropy object
            The units of the array. Powers must be specified using python syntax
            (cm**3, not cm^3).
        dtype : string or NumPy dtype object
            The dtype of the returned array data

        Examples
        --------

        >>> import yt
        >>> import numpy as np
        >>> ds = yt.load('IsolatedGalaxy/galaxy0030/galaxy0030')
        >>> a = ds.arr([1, 2, 3], 'cm')
        >>> b = ds.arr([4, 5, 6], 'm')
        >>> a + b
        YTArray([ 401.,  502.,  603.]) cm
        >>> b + a
        YTArray([ 4.01,  5.02,  6.03]) m

        Arrays returned by this function know about the dataset's unit system

        >>> a = ds.arr(np.ones(5), 'code_length')
        >>> a.in_units('Mpccm/h')
        YTArray([ 1.00010449,  1.00010449,  1.00010449,  1.00010449,
                 1.00010449]) Mpc

        """

        if self._arr is not None:
            return self._arr
        self._arr = functools.partial(YTArray, registry = self.unit_registry)
        return self._arr

    _quan = None
    @property
    def quan(self):
        """Converts an scalar into a :class:`yt.units.yt_array.YTQuantity`

        The returned YTQuantity will be dimensionless by default, but can be
        cast to arbitray units using the ``input_units`` keyword argument.

        Parameters
        ----------

        input_scalar : an integer or floating point scalar
            The scalar to attach units to
        input_units : String unit specification, unit symbol or astropy object
            The units of the quantity. Powers must be specified using python
            syntax (cm**3, not cm^3).
        dtype : string or NumPy dtype object
            The dtype of the array data.

        Examples
        --------

        >>> import yt
        >>> ds = yt.load('IsolatedGalaxy/galaxy0030/galaxy0030')

        >>> a = ds.quan(1, 'cm')
        >>> b = ds.quan(2, 'm')
        >>> a + b
        201.0 cm
        >>> b + a
        2.01 m

        Quantities created this way automatically know about the unit system
        of the dataset.

        >>> a = ds.quan(5, 'code_length')
        >>> a.in_cgs()
        1.543e+25 cm

        """

        if self._quan is not None:
            return self._quan
        self._quan = functools.partial(YTQuantity, registry=self.unit_registry)
        return self._quan

    def add_field(self, name, function=None, **kwargs):
        """
        Dataset-specific call to add_field

        Add a new field, along with supplemental metadata, to the list of
        available fields.  This respects a number of arguments, all of which
        are passed on to the constructor for
        :class:`~yt.data_objects.api.DerivedField`.

        Parameters
        ----------

        name : str
           is the name of the field.
        function : callable
           A function handle that defines the field.  Should accept
           arguments (field, data)
        units : str
           A plain text string encoding the unit.  Powers must be in
           python syntax (** instead of ^).
        take_log : bool
           Describes whether the field should be logged
        validators : list
           A list of :class:`FieldValidator` objects
        particle_type : bool
           Is this a particle (1D) field?
        vector_field : bool
           Describes the dimensionality of the field.  Currently unused.
        display_name : str
           A name used in the plots

        """
        self.index
        override = kwargs.get("force_override", False)
        # Handle the case where the field has already been added.
        if not override and name in self.field_info:
            mylog.warning("Field %s already exists. To override use " +
                          "force_override=True.", name)
        self.field_info.add_field(name, function=function, **kwargs)
        self.field_info._show_field_errors.append(name)
        deps, _ = self.field_info.check_derived_fields([name])
        self.field_dependencies.update(deps)

    def add_deposited_particle_field(self, deposit_field, method):
        """Add a new deposited particle field

        Creates a new deposited field based on the particle *deposit_field*.

        Parameters
        ----------

        deposit_field : tuple
           The field name tuple of the particle field the deposited field will
           be created from.  This must be a field name tuple so yt can
           appropriately infer the correct particle type.
        method : one of 'count', 'sum', or 'cic'
           The particle deposition method to use.

        Returns
        -------

        The field name tuple for the newly created field.
        """
        self.index
        if isinstance(deposit_field, tuple):
            ptype, deposit_field = deposit_field[0], deposit_field[1]
        else:
            raise RuntimeError
        units = self.field_info[ptype, deposit_field].units

        def _deposit_field(field, data):
            """
            Create a grid field for particle wuantities weighted by particle
            mass, using cloud-in-cell deposition.
            """
            pos = data[ptype, "particle_position"]
            # get back into density
            if method != 'count':
                pden = data[ptype, "particle_mass"]
                top = data.deposit(pos, [data[(ptype, deposit_field)]*pden],
                                   method=method)
                bottom = data.deposit(pos, [pden], method=method)
                top[bottom == 0] = 0.0
                bnz = bottom.nonzero()
                top[bnz] /= bottom[bnz]
                d = data.ds.arr(top, input_units=units)
            else:
                d = data.ds.arr(data.deposit(pos, [data[ptype, deposit_field]],
                                             method=method))
            return d
        name_map = {"cic": "cic", "sum": "nn", "count": "count"}
        field_name = "%s_" + name_map[method] + "_%s"
        field_name = field_name % (ptype, deposit_field.replace('particle_', ''))
        self.add_field(
            ("deposit", field_name),
            function=_deposit_field,
            units=units,
            take_log=False,
            validators=[ValidateSpatial()])
        return ("deposit", field_name)

    def add_gradient_fields(self, input_field):
        """Add gradient fields.

        Creates four new grid-based fields that represent the components of
        the gradient of an existing field, plus an extra field for the magnitude
        of the gradient. Currently only supported in Cartesian geometries. The
        gradient is computed using second-order centered differences.

        Parameters
        ----------
        input_field : tuple
           The field name tuple of the particle field the deposited field will
           be created from.  This must be a field name tuple so yt can
           appropriately infer the correct field type.

        Returns
        -------
        A list of field name tuples for the newly created fields.

        Examples
        --------
        >>> grad_fields = ds.add_gradient_fields(("gas","temperature"))
        >>> print(grad_fields)
        [('gas', 'temperature_gradient_x'),
         ('gas', 'temperature_gradient_y'),
         ('gas', 'temperature_gradient_z'),
         ('gas', 'temperature_gradient_magnitude')]
        """
        self.index
        if isinstance(input_field, tuple):
            ftype, input_field = input_field[0], input_field[1]
        else:
            raise RuntimeError
        units = self.field_info[ftype, input_field].units
        setup_gradient_fields(self.field_info, (ftype, input_field), units)
        # Now we make a list of the fields that were just made, to check them
        # and to return them
        grad_fields = [(ftype,input_field+"_gradient_%s" % suffix)
                       for suffix in "xyz"]
        grad_fields.append((ftype,input_field+"_gradient_magnitude"))
        deps, _ = self.field_info.check_derived_fields(grad_fields)
        self.field_dependencies.update(deps)
        return grad_fields
class GadgetSimulation(SimulationTimeSeries):
    r"""
    Initialize an Gadget Simulation object.

    Upon creation, the parameter file is parsed and the time and redshift
    are calculated and stored in all_outputs.  A time units dictionary is
    instantiated to allow for time outputs to be requested with physical
    time units.  The get_time_series can be used to generate a
    DatasetSeries object.

    parameter_filename : str
        The simulation parameter file.
    find_outputs : bool
        If True, the OutputDir directory is searched for datasets.  
        Time and redshift information are gathered by temporarily 
        instantiating each dataset.  This can be used when simulation 
        data was created in a non-standard way, making it difficult 
        to guess the corresponding time and redshift information.
        Default: False.

    Examples
    --------
    >>> import yt
    >>> gs = yt.simulation("my_simulation.par", "Gadget")
    >>> gs.get_time_series()
    >>> for ds in gs:
    ...     print ds.current_time

    """

    def __init__(self, parameter_filename, find_outputs=False):
        self.simulation_type = "particle"
        self.dimensionality = 3
        SimulationTimeSeries.__init__(self, parameter_filename,
                                      find_outputs=find_outputs)

    def _set_units(self):
        self.unit_registry = UnitRegistry()
        self.time_unit = self.quan(1.0, "s")
        if self.cosmological_simulation:
            # Instantiate Cosmology object for units and time conversions.
            self.cosmology = \
              Cosmology(hubble_constant=self.hubble_constant,
                        omega_matter=self.omega_matter,
                        omega_lambda=self.omega_lambda,
                        unit_registry=self.unit_registry)
            self.unit_registry.modify("h", self.hubble_constant)
            # Comoving lengths
            for my_unit in ["m", "pc", "AU", "au"]:
                new_unit = "%scm" % my_unit
                # technically not true, but should be ok
                self.unit_registry.add(
                    new_unit, self.unit_registry.lut[my_unit][0],
                    dimensions.length, "\\rm{%s}/(1+z)" % my_unit)
            self.length_unit = self.quan(self.unit_base["UnitLength_in_cm"],
                                         "cmcm / h", registry=self.unit_registry)
            self.box_size *= self.length_unit.in_units("Mpccm / h")
        else:
            # Read time from file for non-cosmological sim
            self.time_unit = self.quan(
                self.unit_base["UnitLength_in_cm"]/ \
                    self.unit_base["UnitVelocity_in_cm_per_s"], "s")
            self.unit_registry.add("code_time", 1.0, dimensions.time)
            self.unit_registry.modify("code_time", self.time_unit)
            # Length
            self.length_unit = self.quan(
                self.unit_base["UnitLength_in_cm"],"cm")
            self.unit_registry.add("code_length", 1.0, dimensions.length)
            self.unit_registry.modify("code_length", self.length_unit)

    def get_time_series(self, initial_time=None, final_time=None,
                        initial_redshift=None, final_redshift=None,
                        times=None, redshifts=None, tolerance=None,
                        parallel=True, setup_function=None):

        """
        Instantiate a DatasetSeries object for a set of outputs.

        If no additional keywords given, a DatasetSeries object will be
        created with all potential datasets created by the simulation.

        Outputs can be gather by specifying a time or redshift range
        (or combination of time and redshift), with a specific list of
        times or redshifts), or by simply searching all subdirectories 
        within the simulation directory.

        initial_time : tuple of type (float, str)
            The earliest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (5.0, "Gyr").  If None, the initial time of the 
            simulation is used.  This can be used in combination with 
            either final_time or final_redshift.
            Default: None.
        final_time : tuple of type (float, str)
            The latest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (13.7, "Gyr"). If None, the final time of the 
            simulation is used.  This can be used in combination with either 
            initial_time or initial_redshift.
            Default: None.
        times : tuple of type (float array, str)
            A list of times for which outputs will be found and the units 
            of those values.  For example, ([0, 1, 2, 3], "s").
            Default: None.
        initial_redshift : float
            The earliest redshift for outputs to be included.  If None,
            the initial redshift of the simulation is used.  This can be
            used in combination with either final_time or
            final_redshift.
            Default: None.
        final_redshift : float
            The latest redshift for outputs to be included.  If None,
            the final redshift of the simulation is used.  This can be
            used in combination with either initial_time or
            initial_redshift.
            Default: None.
        redshifts : array_like
            A list of redshifts for which outputs will be found.
            Default: None.
        tolerance : float
            Used in combination with "times" or "redshifts" keywords,
            this is the tolerance within which outputs are accepted
            given the requested times or redshifts.  If None, the
            nearest output is always taken.
            Default: None.
        parallel : bool/int
            If True, the generated DatasetSeries will divide the work
            such that a single processor works on each dataset.  If an
            integer is supplied, the work will be divided into that
            number of jobs.
            Default: True.
        setup_function : callable, accepts a ds
            This function will be called whenever a dataset is loaded.

        Examples
        --------

        >>> import yt
        >>> gs = yt.simulation("my_simulation.par", "Gadget")
        
        >>> gs.get_time_series(initial_redshift=10, final_time=(13.7, "Gyr"))

        >>> gs.get_time_series(redshifts=[3, 2, 1, 0])

        >>> # after calling get_time_series
        >>> for ds in gs.piter():
        ...     p = ProjectionPlot(ds, "x", "density")
        ...     p.save()

        >>> # An example using the setup_function keyword
        >>> def print_time(ds):
        ...     print ds.current_time
        >>> gs.get_time_series(setup_function=print_time)
        >>> for ds in gs:
        ...     SlicePlot(ds, "x", "Density").save()

        """

        if (initial_redshift is not None or \
            final_redshift is not None) and \
            not self.cosmological_simulation:
            raise InvalidSimulationTimeSeries(
                "An initial or final redshift has been given for a " +
                "noncosmological simulation.")

        my_all_outputs = self.all_outputs
        if not my_all_outputs:
            DatasetSeries.__init__(self, outputs=[], parallel=parallel,
                                   unit_base=self.unit_base)
            mylog.info("0 outputs loaded into time series.")
            return

        # Apply selection criteria to the set.
        if times is not None:
            my_outputs = self._get_outputs_by_key("time", times,
                                                  tolerance=tolerance,
                                                  outputs=my_all_outputs)

        elif redshifts is not None:
            my_outputs = self._get_outputs_by_key("redshift",
                                                  redshifts, tolerance=tolerance,
                                                  outputs=my_all_outputs)

        else:
            if initial_time is not None:
                if isinstance(initial_time, float):
                    initial_time = self.quan(initial_time, "code_time")
                elif isinstance(initial_time, tuple) and len(initial_time) == 2:
                    initial_time = self.quan(*initial_time)
                elif not isinstance(initial_time, YTArray):
                    raise RuntimeError(
                        "Error: initial_time must be given as a float or " +
                        "tuple of (value, units).")
            elif initial_redshift is not None:
                my_initial_time = self.cosmology.t_from_z(initial_redshift)
            else:
                my_initial_time = self.initial_time

            if final_time is not None:
                if isinstance(final_time, float):
                    final_time = self.quan(final_time, "code_time")
                elif isinstance(final_time, tuple) and len(final_time) == 2:
                    final_time = self.quan(*final_time)
                elif not isinstance(final_time, YTArray):
                    raise RuntimeError(
                        "Error: final_time must be given as a float or " +
                        "tuple of (value, units).")
                my_final_time = final_time.in_units("s")
            elif final_redshift is not None:
                my_final_time = self.cosmology.t_from_z(final_redshift)
            else:
                my_final_time = self.final_time

            my_initial_time.convert_to_units("s")
            my_final_time.convert_to_units("s")
            my_times = np.array([a["time"] for a in my_all_outputs])
            my_indices = np.digitize([my_initial_time, my_final_time], my_times)
            if my_initial_time == my_times[my_indices[0] - 1]: my_indices[0] -= 1
            my_outputs = my_all_outputs[my_indices[0]:my_indices[1]]

        init_outputs = []
        for output in my_outputs:
            if os.path.exists(output["filename"]):
                init_outputs.append(output["filename"])
        if len(init_outputs) == 0 and len(my_outputs) > 0:
            mylog.warn("Could not find any datasets.  " +
                       "Check the value of OutputDir in your parameter file.")
            
        DatasetSeries.__init__(self, outputs=init_outputs, parallel=parallel,
                                setup_function=setup_function,
                                unit_base=self.unit_base)
        mylog.info("%d outputs loaded into time series.", len(init_outputs))

    def _parse_parameter_file(self):
        """
        Parses the parameter file and establishes the various
        dictionaries.
        """

        self.unit_base = {}

        # Let's read the file
        lines = open(self.parameter_filename).readlines()
        comments = ["%", ";"]
        for line in (l.strip() for l in lines):
            for comment in comments:
                if comment in line: line = line[0:line.find(comment)]
            if len(line) < 2: continue
            param, vals = (i.strip() for i in line.split(None, 1))
            # First we try to decipher what type of value it is.
            vals = vals.split()
            # Special case approaching.
            if "(do" in vals: vals = vals[:1]
            if len(vals) == 0:
                pcast = str # Assume NULL output
            else:
                v = vals[0]
                # Figure out if it's castable to floating point:
                try:
                    float(v)
                except ValueError:
                    pcast = str
                else:
                    if any("." in v or "e" in v for v in vals):
                        pcast = float
                    elif v == "inf":
                        pcast = str
                    else:
                        pcast = int
            # Now we figure out what to do with it.
            if param.startswith("Unit"):
                self.unit_base[param] = float(vals[0])
            if len(vals) == 0:
                vals = ""
            elif len(vals) == 1:
                vals = pcast(vals[0])
            else:
                vals = np.array([pcast(i) for i in vals])

            self.parameters[param] = vals

        if self.parameters["ComovingIntegrationOn"]:
            cosmo_attr = {"box_size": "BoxSize",
                          "omega_lambda": "OmegaLambda",
                          "omega_matter": "Omega0",
                          "hubble_constant": "HubbleParam"}
            self.initial_redshift = 1.0 / self.parameters["TimeBegin"] - 1.0
            self.final_redshift = 1.0 / self.parameters["TimeMax"] - 1.0
            self.cosmological_simulation = 1
            for a, v in cosmo_attr.items():
                if not v in self.parameters:
                    raise MissingParameter(self.parameter_filename, v)
                setattr(self, a, self.parameters[v])
        else:
            self.cosmological_simulation = 0
            self.omega_lambda = self.omega_matter = \
                self.hubble_constant = 0.0

    def _snapshot_format(self, index=None):
        """
        The snapshot filename for a given index.  Modify this for different 
        naming conventions.
        """

        if self.parameters["OutputDir"].startswith("/"):
            data_dir = self.parameters["OutputDir"]
        else:
            data_dir = os.path.join(self.directory,
                                    self.parameters["OutputDir"])
        if self.parameters["NumFilesPerSnapshot"] > 1:
            suffix = ".0"
        else:
            suffix = ""
        if self.parameters["SnapFormat"] == 3:
            suffix += ".hdf5"
        if index is None:
            count = "*"
        else:
            count = "%03d" % index
        filename = "%s_%s%s" % (self.parameters["SnapshotFileBase"],
                                count, suffix)
        return os.path.join(data_dir, filename)
                
    def _get_all_outputs(self, find_outputs=False):
        """
        Get all potential datasets and combine into a time-sorted list.
        """

        # Create the set of outputs from which further selection will be done.
        if find_outputs:
            self._find_outputs()
        else:
            if self.parameters["OutputListOn"]:
                a_values = [float(a) for a in 
                           file(self.parameters["OutputListFilename"], "r").readlines()]
            else:
                a_values = [float(self.parameters["TimeOfFirstSnapshot"])]
                time_max = float(self.parameters["TimeMax"])
                while a_values[-1] < time_max:
                    if self.cosmological_simulation:
                        a_values.append(
                            a_values[-1] * self.parameters["TimeBetSnapshot"])
                    else:
                        a_values.append(
                            a_values[-1] + self.parameters["TimeBetSnapshot"])
                if a_values[-1] > time_max:
                    a_values[-1] = time_max

            if self.cosmological_simulation:
                self.all_outputs = \
                  [{"filename": self._snapshot_format(i),
                    "redshift": (1. / a - 1)}
                   for i, a in enumerate(a_values)]
                
                # Calculate times for redshift outputs.
                for output in self.all_outputs:
                    output["time"] = self.cosmology.t_from_z(output["redshift"])
            else:
                self.all_outputs = \
                  [{"filename": self._snapshot_format(i),
                    "time": self.quan(a, "code_time")}
                   for i, a in enumerate(a_values)]

            self.all_outputs.sort(key=lambda obj:obj["time"].to_ndarray())

    def _calculate_simulation_bounds(self):
        """
        Figure out the starting and stopping time and redshift for the simulation.
        """

        # Convert initial/final redshifts to times.
        if self.cosmological_simulation:
            self.initial_time = self.cosmology.t_from_z(self.initial_redshift)
            self.initial_time.units.registry = self.unit_registry
            self.final_time = self.cosmology.t_from_z(self.final_redshift)
            self.final_time.units.registry = self.unit_registry

        # If not a cosmology simulation, figure out the stopping criteria.
        else:
            if "TimeBegin" in self.parameters:
                self.initial_time = self.quan(self.parameters["TimeBegin"], "code_time")
            else:
                self.initial_time = self.quan(0., "code_time")

            if "TimeMax" in self.parameters:
                self.final_time = self.quan(self.parameters["TimeMax"], "code_time")
            else:
                self.final_time = None
            if not "TimeMax" in self.parameters:
                raise NoStoppingCondition(self.parameter_filename)

    def _find_outputs(self):
        """
        Search for directories matching the data dump keywords.
        If found, get dataset times py opening the ds.
        """

        potential_outputs = glob.glob(self._snapshot_format())
        self.all_outputs = self._check_for_outputs(potential_outputs)
        self.all_outputs.sort(key=lambda obj: obj["time"])
        only_on_root(mylog.info, "Located %d total outputs.", len(self.all_outputs))

        # manually set final time and redshift with last output
        if self.all_outputs:
            self.final_time = self.all_outputs[-1]["time"]
            if self.cosmological_simulation:
                self.final_redshift = self.all_outputs[-1]["redshift"]

    def _check_for_outputs(self, potential_outputs):
        r"""
        Check a list of files to see if they are valid datasets.
        """

        only_on_root(mylog.info, "Checking %d potential outputs.", 
                     len(potential_outputs))

        my_outputs = {}
        for my_storage, output in parallel_objects(potential_outputs, 
                                                   storage=my_outputs):
            if os.path.exists(output):
                try:
                    ds = load(output)
                    if ds is not None:
                        my_storage.result = {"filename": output,
                                             "time": ds.current_time.in_units("s")}
                        if ds.cosmological_simulation:
                            my_storage.result["redshift"] = ds.current_redshift
                except YTOutputNotIdentified:
                    mylog.error("Failed to load %s", output)
        my_outputs = [my_output for my_output in my_outputs.values() \
                      if my_output is not None]
        return my_outputs

    def _write_cosmology_outputs(self, filename, outputs, start_index,
                                 decimals=3):
        r"""
        Write cosmology output parameters for a cosmology splice.
        """

        mylog.info("Writing redshift output list to %s.", filename)
        f = open(filename, "w")
        for output in outputs:
            f.write("%f\n" % (1. / (1. + output["redshift"])))
        f.close()