Example #1
0
def test_ptype():
    n_particles = 100
    fields = ['particle_position_x', 'particle_position_y', 'particle_position_z', 'particle_index',
              'particle_dummy']
    negative = [False, False, False, False, False]
    units = ['cm', 'cm', 'cm', '1', '1']

    # Setup filters on the 'particle_dummy' field, keeping only the first 50
    @particle_filter(name='dummy', requires=["particle_dummy"])
    def dummy(pfilter, data):
        return data[(pfilter.filtered_type, "particle_dummy")] <= n_particles // 2

    # Setup fake particle datasets with repeated ids. This should work because
    # the ids are unique among `dummy_particles` so let's test this
    data = {'particle_index': np.arange(n_particles) % (n_particles // 2),
            'particle_dummy': np.arange(n_particles)}
    all_ds = [fake_particle_ds(fields=fields, negative=negative, units=units,
                               npart=n_particles, data=data)]
    for ds in all_ds:
        ds.add_particle_filter('dummy')
    ts = DatasetSeries(all_ds)

    # Select all dummy particles
    print(ts[0].derived_field_list)
    ids = ts[0].all_data()['dummy', 'particle_index']

    # Build trajectories
    ts.particle_trajectories(ids, ptype='dummy')
Example #2
0
def test_init_fake_dataseries():

    file_list = [f"fake_data_file_{str(i).zfill(4)}" for i in range(10)]
    with tempfile.TemporaryDirectory() as tmpdir:
        pfile_list = [Path(tmpdir) / file for file in file_list]
        sfile_list = [str(file) for file in pfile_list]
        for file in pfile_list:
            file.touch()
        pattern = Path(tmpdir) / "fake_data_file_*"

        # init from str pattern
        ts = DatasetSeries(pattern)
        assert ts._pre_outputs == sfile_list

        # init from Path pattern
        ppattern = Path(pattern)
        ts = DatasetSeries(ppattern)
        assert ts._pre_outputs == sfile_list

        # init form str list
        ts = DatasetSeries(sfile_list)
        assert ts._pre_outputs == sfile_list

        # init form Path list
        ts = DatasetSeries(pfile_list)
        assert ts._pre_outputs == pfile_list

        # rejected input type (str repr of a list) "[file1, file2, ...]"
        assert_raises(FileNotFoundError, DatasetSeries, str(file_list))

        # finally, check that ts[0] fails to actually load
        assert_raises(YTUnidentifiedDataType, ts.__getitem__, 0)
Example #3
0
def test_orbit_traj():
    fields = ["particle_velocity_x", "particle_velocity_y", "particle_velocity_z"]
    my_fns = glob.glob(os.path.join(data_path, "Orbit/orbit_hdf5_chk_00[0-9][0-9]"))
    my_fns.sort()
    ts = DatasetSeries(my_fns)
    ds = ts[0]
    traj = ts.particle_trajectories([1, 2], fields=fields, suppress_logging=True)
    for field in pfields+vfields:
        def field_func(name):
            return traj[field]
        yield GenericArrayTest(ds, field_func, args=[field])
Example #4
0
def test_etc_traj():
    fields = ["particle_velocity_x", "particle_velocity_y", "particle_velocity_z"]
    my_fns = glob.glob(os.path.join(data_path, "enzo_tiny_cosmology/DD000[0-9]/*.hierarchy"))
    my_fns.sort()
    ts = DatasetSeries(my_fns)
    ds = ts[0]
    sp = ds.sphere("max", (0.5, "Mpc"))
    indices = sp["particle_index"][sp["particle_type"] == 1][:5]
    traj = ts.particle_trajectories(indices, fields=fields, suppress_logging=True)
    traj.add_fields(["density"])
    for field in pfields+vfields+["density"]:
        def field_func(name):
            return traj[field]
        yield GenericArrayTest(ds, field_func, args=[field])
def test_pattern_expansion():
    file_list = [f"fake_data_file_{str(i).zfill(4)}" for i in range(10)]

    with tempfile.TemporaryDirectory() as tmpdir:
        tmp_path = Path(tmpdir)
        for file in file_list:
            (tmp_path / file).touch()

        pattern = tmp_path / "fake_data_file_*"
        expected = [str(tmp_path / file) for file in file_list]
        found = DatasetSeries._get_filenames_from_glob_pattern(pattern)
        assert found == expected

        found2 = DatasetSeries._get_filenames_from_glob_pattern(Path(pattern))
        assert found2 == expected
Example #6
0
def test_uniqueness():
    n_particles = 2
    n_steps = 2
    ids = np.arange(n_particles, dtype=int) % (n_particles // 2)
    data = {"particle_index": ids}
    fields = [
        "particle_position_x",
        "particle_position_y",
        "particle_position_z",
        "particle_index",
    ]
    negative = [False, False, False, False]
    units = ["cm", "cm", "cm", "1"]

    ts = DatasetSeries(
        [
            fake_particle_ds(
                fields=fields,
                negative=negative,
                units=units,
                npart=n_particles,
                data=data,
            )
            for i in range(n_steps)
        ]
    )

    assert_raises(YTIllDefinedParticleData, ts.particle_trajectories, [0])
    def __init__(self, outputs, indices, fields=None, suppress_logging=False):

        indices.sort() # Just in case the caller wasn't careful
        self.field_data = YTFieldData()
        if isinstance(outputs, DatasetSeries):
            self.data_series = outputs
        else:
            self.data_series = DatasetSeries(outputs)
        self.masks = []
        self.sorts = []
        self.array_indices = []
        self.indices = indices
        self.num_indices = len(indices)
        self.num_steps = len(outputs)
        self.times = []
        self.suppress_logging = suppress_logging

        # Default fields 
        
        if fields is None: fields = []
        fields.append("particle_position_x")
        fields.append("particle_position_y")
        fields.append("particle_position_z")
        fields = list(OrderedDict.fromkeys(fields))

        if self.suppress_logging:
            old_level = int(ytcfg.get("yt","loglevel"))
            mylog.setLevel(40)
        my_storage = {}
        pbar = get_pbar("Constructing trajectory information", len(self.data_series))
        for i, (sto, ds) in enumerate(self.data_series.piter(storage=my_storage)):
            dd = ds.all_data()
            idx_field = dd._determine_fields("particle_index")[0]
            newtags = dd[idx_field].ndarray_view().astype("int64")
            mask = np.in1d(newtags, indices, assume_unique=True)
            sorts = np.argsort(newtags[mask])
            self.array_indices.append(np.where(np.in1d(indices, newtags, assume_unique=True))[0])
            self.masks.append(mask)
            self.sorts.append(sorts)
            sto.result_id = ds.parameter_filename
            sto.result = ds.current_time
            pbar.update(i)
        pbar.finish()

        if self.suppress_logging:
            mylog.setLevel(old_level)

        times = []
        for fn, time in sorted(my_storage.items()):
            times.append(time)

        self.times = self.data_series[0].arr([time for time in times], times[0].units)

        self.particle_fields = []

        # Instantiate fields the caller requested

        for field in fields:
            self._get_data(field)
Example #8
0
 def __getitem__(self, key):
     if isinstance(key, slice):
         if isinstance(key.start, float):
             return self.get_range(key.start, key.stop)
         # This will return a sliced up object!
         return DatasetSeries(self._pre_outputs[key], self.parallel)
     o = self._pre_outputs[key]
     fn, step = o
     o = load(fn, step=step)
     self._setup_function(o)
     return o
Example #9
0
 def __init__(self,
              ts,
              num_readers=1,
              num_writers=None,
              outbase="rockstar_halos",
              particle_type="all",
              force_res=None,
              total_particles=None,
              dm_only=False,
              particle_mass=None,
              min_halo_size=25):
     if is_root():
         mylog.info(
             "The citation for the Rockstar halo finder can be found at")
         mylog.info("http://adsabs.harvard.edu/abs/2013ApJ...762..109B")
     ParallelAnalysisInterface.__init__(self)
     # Decide how we're working.
     if ytcfg.getboolean("yt", "inline") == True:
         self.runner = InlineRunner()
     else:
         self.runner = StandardRunner(num_readers, num_writers)
     self.num_readers = self.runner.num_readers
     self.num_writers = self.runner.num_writers
     mylog.info("Rockstar is using %d readers and %d writers",
                self.num_readers, self.num_writers)
     # Note that Rockstar does not support subvolumes.
     # We assume that all of the snapshots in the time series
     # use the same domain info as the first snapshots.
     if not isinstance(ts, DatasetSeries):
         ts = DatasetSeries([ts])
     self.ts = ts
     self.particle_type = particle_type
     self.outbase = outbase
     self.min_halo_size = min_halo_size
     if force_res is None:
         tds = ts[-1]  # Cache a reference
         self.force_res = tds.index.get_smallest_dx().in_units("Mpc/h")
         # We have to delete now to wipe the index
         del tds
     else:
         self.force_res = force_res
     self.total_particles = total_particles
     self.dm_only = dm_only
     self.particle_mass = particle_mass
     # Setup pool and workgroups.
     self.pool, self.workgroup = self.runner.setup_pool()
     p = self._setup_parameters(ts)
     params = self.comm.mpi_bcast(p, root=self.pool['readers'].ranks[0])
     self.__dict__.update(params)
     self.handler = rockstar_interface.RockstarInterface(self.ts)
Example #10
0
def _fof_method(hc, **finder_kwargs):
    r"""
    Run the FoF halo finding method.
    """

    ds = hc.data_ds
    if isinstance(ds, DatasetSeries):
        ts = ds
    else:
        ts = DatasetSeries([ds])

    for my_ds in ts:
        halo_list = FOFHaloFinder(my_ds, **finder_kwargs)
        _parse_halo_list(hc, halo_list)
Example #11
0
 def _initialize_dataset(self, ts):
     if not isinstance(ts, DatasetSeries):
         if not iterable(ts): ts = [ts]
         ts = DatasetSeries(ts)
     return ts
Example #12
0
    def __init__(self, outputs, indices, fields=None, suppress_logging=False):

        indices.sort()  # Just in case the caller wasn't careful
        self.field_data = YTFieldData()
        if isinstance(outputs, DatasetSeries):
            self.data_series = outputs
        else:
            self.data_series = DatasetSeries(outputs)
        self.masks = []
        self.sorts = []
        self.array_indices = []
        self.indices = indices
        self.num_indices = len(indices)
        self.num_steps = len(outputs)
        self.times = []
        self.suppress_logging = suppress_logging

        # Default fields

        if fields is None: fields = []
        fields.append("particle_position_x")
        fields.append("particle_position_y")
        fields.append("particle_position_z")
        fields = list(OrderedDict.fromkeys(fields))

        if self.suppress_logging:
            old_level = int(ytcfg.get("yt", "loglevel"))
            mylog.setLevel(40)
        my_storage = {}
        pbar = get_pbar("Constructing trajectory information",
                        len(self.data_series))
        for i, (sto,
                ds) in enumerate(self.data_series.piter(storage=my_storage)):
            dd = ds.all_data()
            idx_field = dd._determine_fields("particle_index")[0]
            newtags = dd[idx_field].ndarray_view().astype("int64")
            mask = np.in1d(newtags, indices, assume_unique=True)
            sorts = np.argsort(newtags[mask])
            self.array_indices.append(
                np.where(np.in1d(indices, newtags, assume_unique=True))[0])
            self.masks.append(mask)
            self.sorts.append(sorts)
            sto.result_id = ds.parameter_filename
            sto.result = ds.current_time
            pbar.update(i)
        pbar.finish()

        if self.suppress_logging:
            mylog.setLevel(old_level)

        times = []
        for fn, time in sorted(my_storage.items()):
            times.append(time)

        self.times = self.data_series[0].arr([time for time in times],
                                             times[0].units)

        self.particle_fields = []

        # Instantiate fields the caller requested

        for field in fields:
            self._get_data(field)
Example #13
0
class ParticleTrajectories(object):
    r"""A collection of particle trajectories in time over a series of
    datasets. 

    The ParticleTrajectories object contains a collection of
    particle trajectories for a specified set of particle indices. 
    
    Parameters
    ----------
    outputs : `yt.data_objects.time_series.DatasetSeries` or list of strings
        DatasetSeries object, or a time-sorted list of filenames to
        construct a new DatasetSeries object.
    indices : array_like
        An integer array of particle indices whose trajectories we
        want to track. If they are not sorted they will be sorted.
    fields : list of strings, optional
        A set of fields that is retrieved when the trajectory
        collection is instantiated.
        Default : None (will default to the fields 'particle_position_x',
        'particle_position_y', 'particle_position_z')
    suppress_logging : boolean
        Suppress yt's logging when iterating over the simulation time
        series.
        Default : False

    Examples
    ________
    >>> from yt.mods import *
    >>> my_fns = glob.glob("orbit_hdf5_chk_00[0-9][0-9]")
    >>> my_fns.sort()
    >>> fields = ["particle_position_x", "particle_position_y",
    >>>           "particle_position_z", "particle_velocity_x",
    >>>           "particle_velocity_y", "particle_velocity_z"]
    >>> ds = load(my_fns[0])
    >>> init_sphere = ds.sphere(ds.domain_center, (.5, "unitary"))
    >>> indices = init_sphere["particle_index"].astype("int")
    >>> trajs = ParticleTrajectories(my_fns, indices, fields=fields)
    >>> for t in trajs :
    >>>     print t["particle_velocity_x"].max(), t["particle_velocity_x"].min()
    """
    def __init__(self, outputs, indices, fields=None, suppress_logging=False):

        indices.sort()  # Just in case the caller wasn't careful
        self.field_data = YTFieldData()
        if isinstance(outputs, DatasetSeries):
            self.data_series = outputs
        else:
            self.data_series = DatasetSeries(outputs)
        self.masks = []
        self.sorts = []
        self.array_indices = []
        self.indices = indices
        self.num_indices = len(indices)
        self.num_steps = len(outputs)
        self.times = []
        self.suppress_logging = suppress_logging

        # Default fields

        if fields is None: fields = []
        fields.append("particle_position_x")
        fields.append("particle_position_y")
        fields.append("particle_position_z")
        fields = list(OrderedDict.fromkeys(fields))

        if self.suppress_logging:
            old_level = int(ytcfg.get("yt", "loglevel"))
            mylog.setLevel(40)
        my_storage = {}
        pbar = get_pbar("Constructing trajectory information",
                        len(self.data_series))
        for i, (sto,
                ds) in enumerate(self.data_series.piter(storage=my_storage)):
            dd = ds.all_data()
            idx_field = dd._determine_fields("particle_index")[0]
            newtags = dd[idx_field].ndarray_view().astype("int64")
            mask = np.in1d(newtags, indices, assume_unique=True)
            sorts = np.argsort(newtags[mask])
            self.array_indices.append(
                np.where(np.in1d(indices, newtags, assume_unique=True))[0])
            self.masks.append(mask)
            self.sorts.append(sorts)
            sto.result_id = ds.parameter_filename
            sto.result = ds.current_time
            pbar.update(i)
        pbar.finish()

        if self.suppress_logging:
            mylog.setLevel(old_level)

        times = []
        for fn, time in sorted(my_storage.items()):
            times.append(time)

        self.times = self.data_series[0].arr([time for time in times],
                                             times[0].units)

        self.particle_fields = []

        # Instantiate fields the caller requested

        for field in fields:
            self._get_data(field)

    def has_key(self, key):
        return (key in self.field_data)

    def keys(self):
        return self.field_data.keys()

    def __getitem__(self, key):
        """
        Get the field associated with key.
        """
        if key == "particle_time":
            return self.times
        if key not in self.field_data:
            self._get_data(key)
        return self.field_data[key]

    def __setitem__(self, key, val):
        """
        Sets a field to be some other value.
        """
        self.field_data[key] = val

    def __delitem__(self, key):
        """
        Delete the field from the trajectory
        """
        del self.field_data[key]

    def __iter__(self):
        """
        This iterates over the trajectories for
        the different particles, returning dicts
        of fields for each trajectory
        """
        for idx in range(self.num_indices):
            traj = {}
            traj["particle_index"] = self.indices[idx]
            traj["particle_time"] = self.times
            for field in self.field_data.keys():
                traj[field] = self[field][idx, :]
            yield traj

    def __len__(self):
        """
        The number of individual trajectories
        """
        return self.num_indices

    def add_fields(self, fields):
        """
        Add a list of fields to an existing trajectory

        Parameters
        ----------
        fields : list of strings
            A list of fields to be added to the current trajectory
            collection.

        Examples
        ________
        >>> from yt.mods import *
        >>> trajs = ParticleTrajectories(my_fns, indices)
        >>> trajs.add_fields(["particle_mass", "particle_gpot"])
        """
        for field in fields:
            if field not in self.field_data:
                self._get_data(field)

    def _get_data(self, field):
        """
        Get a field to include in the trajectory collection.
        The trajectory collection itself is a dict of 2D numpy arrays,
        with shape (num_indices, num_steps)
        """
        if field not in self.field_data:
            if self.suppress_logging:
                old_level = int(ytcfg.get("yt", "loglevel"))
                mylog.setLevel(40)
            ds_first = self.data_series[0]
            dd_first = ds_first.all_data()
            fd = dd_first._determine_fields(field)[0]
            if field not in self.particle_fields:
                if self.data_series[0].field_info[fd].particle_type:
                    self.particle_fields.append(field)
            particles = np.empty((self.num_indices, self.num_steps))
            particles[:] = np.nan
            step = int(0)
            pbar = get_pbar("Generating field %s in trajectories." % (field),
                            self.num_steps)
            my_storage = {}
            for i, (sto, ds) in enumerate(
                    self.data_series.piter(storage=my_storage)):
                mask = self.masks[i]
                sort = self.sorts[i]
                if field in self.particle_fields:
                    # This is easy... just get the particle fields
                    dd = ds.all_data()
                    pfield = dd[fd].ndarray_view()[mask][sort]
                else:
                    # This is hard... must loop over grids
                    pfield = np.zeros((self.num_indices))
                    x = self["particle_position_x"][:, step].ndarray_view()
                    y = self["particle_position_y"][:, step].ndarray_view()
                    z = self["particle_position_z"][:, step].ndarray_view()
                    # This will fail for non-grid index objects
                    particle_grids, particle_grid_inds = ds.index._find_points(
                        x, y, z)
                    for grid in particle_grids:
                        cube = grid.retrieve_ghost_zones(1, [fd])
                        CICSample_3(
                            x, y, z, pfield, self.num_indices, cube[fd],
                            np.array(grid.LeftEdge).astype(np.float64),
                            np.array(grid.ActiveDimensions).astype(np.int32),
                            grid.dds[0])
                sto.result_id = ds.parameter_filename
                sto.result = (self.array_indices[i], pfield)
                pbar.update(step)
                step += 1
            pbar.finish()
            for i, (fn, (indices,
                         pfield)) in enumerate(sorted(my_storage.items())):
                particles[indices, i] = pfield
            self.field_data[field] = array_like_field(dd_first, particles, fd)
            if self.suppress_logging:
                mylog.setLevel(old_level)
        return self.field_data[field]

    def trajectory_from_index(self, index):
        """
        Retrieve a single trajectory corresponding to a specific particle
        index

        Parameters
        ----------
        index : int
            This defines which particle trajectory from the
            ParticleTrajectories object will be returned.

        Returns
        -------
        A dictionary corresponding to the particle's trajectory and the
        fields along that trajectory

        Examples
        --------
        >>> from yt.mods import *
        >>> import matplotlib.pylab as pl
        >>> trajs = ParticleTrajectories(my_fns, indices)
        >>> traj = trajs.trajectory_from_index(indices[0])
        >>> pl.plot(traj["particle_time"], traj["particle_position_x"], "-x")
        >>> pl.savefig("orbit")
        """
        mask = np.in1d(self.indices, (index, ), assume_unique=True)
        if not np.any(mask):
            print("The particle index %d is not in the list!" % (index))
            raise IndexError
        fields = [field for field in sorted(self.field_data.keys())]
        traj = {}
        traj["particle_time"] = self.times
        traj["particle_index"] = index
        for field in fields:
            traj[field] = self[field][mask, :][0]
        return traj

    @parallel_root_only
    def write_out(self, filename_base):
        """
        Write out particle trajectories to tab-separated ASCII files (one
        for each trajectory) with the field names in the file header. Each
        file is named with a basename and the index number.

        Parameters
        ----------
        filename_base : string
            The prefix for the outputted ASCII files.

        Examples
        --------
        >>> from yt.mods import *
        >>> trajs = ParticleTrajectories(my_fns, indices)
        >>> trajs.write_out("orbit_trajectory")       
        """
        fields = [field for field in sorted(self.field_data.keys())]
        num_fields = len(fields)
        first_str = "# particle_time\t" + "\t".join(fields) + "\n"
        template_str = "%g\t" * num_fields + "%g\n"
        for ix in range(self.num_indices):
            outlines = [first_str]
            for it in range(self.num_steps):
                outlines.append(
                    template_str %
                    tuple([self.times[it]] +
                          [self[field][ix, it] for field in fields]))
            fid = open(filename_base + "_%d.dat" % self.indices[ix], "w")
            fid.writelines(outlines)
            fid.close()
            del fid

    @parallel_root_only
    def write_out_h5(self, filename):
        """
        Write out all the particle trajectories to a single HDF5 file
        that contains the indices, the times, and the 2D array for each
        field individually

        Parameters
        ----------

        filename : string
            The output filename for the HDF5 file

        Examples
        --------

        >>> from yt.mods import *
        >>> trajs = ParticleTrajectories(my_fns, indices)
        >>> trajs.write_out_h5("orbit_trajectories")                
        """
        fid = h5py.File(filename, "w")
        fields = [field for field in sorted(self.field_data.keys())]
        fid.create_dataset("particle_indices",
                           dtype=np.int32,
                           data=self.indices)
        fid.create_dataset("particle_time", data=self.times)
        for field in fields:
            fid.create_dataset("%s" % field, data=self[field])
        fid.close()
    def get_time_series(self, time_data=True, redshift_data=True,
                        initial_time=None, final_time=None,
                        initial_redshift=None, final_redshift=None,
                        initial_cycle=None, final_cycle=None,
                        times=None, redshifts=None, tolerance=None,
                        parallel=True, setup_function=None):

        """
        Instantiate a DatasetSeries object for a set of outputs.

        If no additional keywords given, a DatasetSeries object will be
        created with all potential datasets created by the simulation.

        Outputs can be gather by specifying a time or redshift range
        (or combination of time and redshift), with a specific list of
        times or redshifts, a range of cycle numbers (for cycle based
        output), or by simply searching all subdirectories within the
        simulation directory.

        time_data : bool
            Whether or not to include time outputs when gathering
            datasets for time series.
            Default: True.
        redshift_data : bool
            Whether or not to include redshift outputs when gathering
            datasets for time series.
            Default: True.
        initial_time : tuple of type (float, str)
            The earliest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (5.0, "Gyr").  If None, the initial time of the 
            simulation is used.  This can be used in combination with 
            either final_time or final_redshift.
            Default: None.
        final_time : tuple of type (float, str)
            The latest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (13.7, "Gyr"). If None, the final time of the 
            simulation is used.  This can be used in combination with either 
            initial_time or initial_redshift.
            Default: None.
        times : tuple of type (float array, str)
            A list of times for which outputs will be found and the units 
            of those values.  For example, ([0, 1, 2, 3], "s").
            Default: None.
        initial_redshift : float
            The earliest redshift for outputs to be included.  If None,
            the initial redshift of the simulation is used.  This can be
            used in combination with either final_time or
            final_redshift.
            Default: None.
        final_redshift : float
            The latest redshift for outputs to be included.  If None,
            the final redshift of the simulation is used.  This can be
            used in combination with either initial_time or
            initial_redshift.
            Default: None.
        redshifts : array_like
            A list of redshifts for which outputs will be found.
            Default: None.
        initial_cycle : float
            The earliest cycle for outputs to be included.  If None,
            the initial cycle of the simulation is used.  This can
            only be used with final_cycle.
            Default: None.
        final_cycle : float
            The latest cycle for outputs to be included.  If None,
            the final cycle of the simulation is used.  This can
            only be used in combination with initial_cycle.
            Default: None.
        tolerance : float
            Used in combination with "times" or "redshifts" keywords,
            this is the tolerance within which outputs are accepted
            given the requested times or redshifts.  If None, the
            nearest output is always taken.
            Default: None.
        parallel : bool/int
            If True, the generated DatasetSeries will divide the work
            such that a single processor works on each dataset.  If an
            integer is supplied, the work will be divided into that
            number of jobs.
            Default: True.
        setup_function : callable, accepts a ds
            This function will be called whenever a dataset is loaded.

        Examples
        --------

        >>> import yt
        >>> es = yt.simulation("my_simulation.par", "Enzo")
        
        >>> es.get_time_series(initial_redshift=10, final_time=(13.7, "Gyr"), 
                               redshift_data=False)

        >>> es.get_time_series(redshifts=[3, 2, 1, 0])

        >>> es.get_time_series(final_cycle=100000)

        >>> # after calling get_time_series
        >>> for ds in es.piter():
        ...     p = ProjectionPlot(ds, 'x', "density")
        ...     p.save()

        >>> # An example using the setup_function keyword
        >>> def print_time(ds):
        ...     print ds.current_time
        >>> es.get_time_series(setup_function=print_time)
        >>> for ds in es:
        ...     SlicePlot(ds, "x", "Density").save()

        """

        if (initial_redshift is not None or \
            final_redshift is not None) and \
            not self.cosmological_simulation:
            raise InvalidSimulationTimeSeries(
                "An initial or final redshift has been given for a " +
                "noncosmological simulation.")

        if time_data and redshift_data:
            my_all_outputs = self.all_outputs
        elif time_data:
            my_all_outputs = self.all_time_outputs
        elif redshift_data:
            my_all_outputs = self.all_redshift_outputs
        else:
            raise InvalidSimulationTimeSeries('Both time_data and redshift_data are False.')

        if not my_all_outputs:
            DatasetSeries.__init__(self, outputs=[], parallel=parallel)
            mylog.info("0 outputs loaded into time series.")
            return

        # Apply selection criteria to the set.
        if times is not None:
            my_outputs = self._get_outputs_by_key("time", times,
                                                  tolerance=tolerance,
                                                  outputs=my_all_outputs)

        elif redshifts is not None:
            my_outputs = self._get_outputs_by_key("redshift", redshifts,
                                                  tolerance=tolerance,
                                                  outputs=my_all_outputs)

        elif initial_cycle is not None or final_cycle is not None:
            if initial_cycle is None:
                initial_cycle = 0
            else:
                initial_cycle = max(initial_cycle, 0)
            if final_cycle is None:
                final_cycle = self.parameters['StopCycle']
            else:
                final_cycle = min(final_cycle, self.parameters['StopCycle'])

            my_outputs = my_all_outputs[int(ceil(float(initial_cycle) /
                                                 self.parameters['CycleSkipDataDump'])):
                                        (final_cycle /  self.parameters['CycleSkipDataDump'])+1]

        else:
            if initial_time is not None:
                if isinstance(initial_time, float):
                    initial_time = self.quan(initial_time, "code_time")
                elif isinstance(initial_time, tuple) and len(initial_time) == 2:
                    initial_time = self.quan(*initial_time)
                elif not isinstance(initial_time, YTArray):
                    raise RuntimeError(
                        "Error: initial_time must be given as a float or " +
                        "tuple of (value, units).")
            elif initial_redshift is not None:
                my_initial_time = self.cosmology.t_from_z(initial_redshift)
            else:
                my_initial_time = self.initial_time

            if final_time is not None:
                if isinstance(final_time, float):
                    final_time = self.quan(final_time, "code_time")
                elif isinstance(final_time, tuple) and len(final_time) == 2:
                    final_time = self.quan(*final_time)
                elif not isinstance(final_time, YTArray):
                    raise RuntimeError(
                        "Error: final_time must be given as a float or " +
                        "tuple of (value, units).")
                my_final_time = final_time.in_units("s")
            elif final_redshift is not None:
                my_final_time = self.cosmology.t_from_z(final_redshift)
            else:
                my_final_time = self.final_time

            my_initial_time.convert_to_units("s")
            my_final_time.convert_to_units("s")
            my_times = np.array([a['time'] for a in my_all_outputs])
            my_indices = np.digitize([my_initial_time, my_final_time], my_times)
            if my_initial_time == my_times[my_indices[0] - 1]: my_indices[0] -= 1
            my_outputs = my_all_outputs[my_indices[0]:my_indices[1]]

        init_outputs = []
        for output in my_outputs:
            if os.path.exists(output['filename']):
                init_outputs.append(output['filename'])
            
        DatasetSeries.__init__(self, outputs=init_outputs, parallel=parallel,
                                setup_function=setup_function)
        mylog.info("%d outputs loaded into time series.", len(init_outputs))
Example #15
0
def load(*args ,**kwargs):
    """
    This function attempts to determine the base data type of a filename or
    other set of arguments by calling
    :meth:`yt.data_objects.static_output.Dataset._is_valid` until it finds a
    match, at which point it returns an instance of the appropriate
    :class:`yt.data_objects.static_output.Dataset` subclass.
    """
    args = _sanitize_load_args(*args)
    candidates = []
    valid_file = []
    for argno, arg in enumerate(args):
        if isinstance(arg, str):
            if os.path.exists(arg):
                valid_file.append(True)
            elif arg.startswith("http"):
                valid_file.append(True)
            else:
                if os.path.exists(os.path.join(ytcfg.get("yt", "test_data_dir"), arg)):
                    valid_file.append(True)
                    args[argno] = os.path.join(ytcfg.get("yt", "test_data_dir"), arg)
                else:
                    valid_file.append(False)
        else:
            valid_file.append(False)
    types_to_check = output_type_registry
    if not any(valid_file):
        try:
            from yt.data_objects.time_series import DatasetSeries
            ts = DatasetSeries.from_filenames(*args, **kwargs)
            return ts
        except (TypeError, YTOutputNotIdentified):
            pass
        # We check if either the first argument is a dict or list, in which
        # case we try identifying candidates.
        if len(args) > 0 and isinstance(args[0], (list, dict)):
            # This fixes issues where it is assumed the first argument is a
            # file
            types_to_check = dict((n, v) for n, v in
                    output_type_registry.items() if n.startswith("stream_"))
            # Better way to do this is to override the output_type_registry
        else:
            mylog.error("None of the arguments provided to load() is a valid file")
            mylog.error("Please check that you have used a correct path")
            raise YTOutputNotIdentified(args, kwargs)
    for n, c in types_to_check.items():
        if n is None: continue
        if c._is_valid(*args, **kwargs): candidates.append(n)

    # convert to classes
    candidates = [output_type_registry[c] for c in candidates]
    # Find only the lowest subclasses, i.e. most specialised front ends
    candidates = find_lowest_subclasses(candidates)
    if len(candidates) == 1:
        return candidates[0](*args, **kwargs)
    if len(candidates) == 0:
        if ytcfg.get("yt", "enzo_db") != '' \
           and len(args) == 1 \
           and isinstance(args[0], str):
            erdb = EnzoRunDatabase()
            fn = erdb.find_uuid(args[0])
            n = "EnzoDataset"
            if n in output_type_registry \
               and output_type_registry[n]._is_valid(fn):
                return output_type_registry[n](fn)
        mylog.error("Couldn't figure out output type for %s", args[0])
        raise YTOutputNotIdentified(args, kwargs)

    mylog.error("Multiple output type candidates for %s:", args[0])
    for c in candidates:
        mylog.error("    Possible: %s", c)
    raise YTOutputNotIdentified(args, kwargs)
Example #16
0
def load(fn, *args, **kwargs):
    """
    Load a Dataset or DatasetSeries object.
    The data format is automatically discovered, and the exact return type is the
    corresponding subclass of :class:`yt.data_objects.static_output.Dataset`.
    A :class:`yt.data_objects.time_series.DatasetSeries` is created if the first
    argument is a pattern.

    Parameters
    ----------
    fn : str, os.Pathlike, or byte (types supported by os.path.expandusers)
        A path to the data location. This can be a file name, directory name, a glob
        pattern, or a url (for data types that support it).

    Additional arguments, if any, are passed down to the return class.

    Returns
    -------
    :class:`yt.data_objects.static_output.Dataset` object
        If fn is a single path, create a Dataset from the appropriate subclass.

    :class:`yt.data_objects.time_series.DatasetSeries`
        If fn is a glob pattern (i.e. containing wildcards '[]?!*'), create a series.

    Raises
    ------
    FileNotFoundError
        If fn does not match any existing file or directory.

    yt.utilities.exceptions.YTUnidentifiedDataType
        If fn matches existing files or directories with undetermined format.

    yt.utilities.exceptions.YTAmbiguousDataType
        If the data format matches more than one class of similar specilization levels.
    """
    fn = os.path.expanduser(fn)

    if any(wildcard in fn for wildcard in "[]?!*"):
        from yt.data_objects.time_series import DatasetSeries

        return DatasetSeries(fn, *args, **kwargs)

    # Unless the dataset starts with http
    # look for it using the path or relative to the data dir (in this order).
    if not (os.path.exists(fn) or fn.startswith("http")):
        data_dir = ytcfg.get("yt", "test_data_dir")
        alt_fn = os.path.join(data_dir, fn)
        if os.path.exists(alt_fn):
            fn = alt_fn
        else:
            msg = f"No such file or directory: '{fn}'."
            if os.path.exists(data_dir):
                msg += f"\n(Also tried '{alt_fn}')."
            raise FileNotFoundError(msg)

    candidates = []
    for cls in output_type_registry.values():
        if cls._is_valid(fn, *args, **kwargs):
            candidates.append(cls)

    # Find only the lowest subclasses, i.e. most specialised front ends
    candidates = find_lowest_subclasses(candidates)

    if len(candidates) == 1:
        return candidates[0](fn, *args, **kwargs)

    if len(candidates) > 1:
        raise YTAmbiguousDataType(fn, candidates)

    raise YTUnidentifiedDataType(fn, *args, **kwargs)
def test_init_fake_dataseries():

    file_list = [f"fake_data_file_{str(i).zfill(4)}" for i in range(10)]
    with tempfile.TemporaryDirectory() as tmpdir:
        pfile_list = [Path(tmpdir) / file for file in file_list]
        sfile_list = [str(file) for file in pfile_list]
        for file in pfile_list:
            file.touch()
        pattern = Path(tmpdir) / "fake_data_file_*"

        # init from str pattern
        ts = DatasetSeries(pattern)
        assert ts._pre_outputs == sfile_list

        # init from Path pattern
        ppattern = Path(pattern)
        ts = DatasetSeries(ppattern)
        assert ts._pre_outputs == sfile_list

        # init form str list
        ts = DatasetSeries(sfile_list)
        assert ts._pre_outputs == sfile_list

        # init form Path list
        ts = DatasetSeries(pfile_list)
        assert ts._pre_outputs == pfile_list

        # rejected input type (str repr of a list) "[file1, file2, ...]"
        assert_raises(FileNotFoundError, DatasetSeries, str(file_list))

        # finally, check that ts[0] fails to actually load
        assert_raises(YTUnidentifiedDataType, ts.__getitem__, 0)

        class FakeDataset(Dataset):
            """A minimal loadable fake dataset subclass"""
            @classmethod
            def _is_valid(cls, *args, **kwargs):
                return True

            def _parse_parameter_file(self):
                return

            def _set_code_unit_attributes(self):
                return

            def set_code_units(self):
                self.current_time = 0
                return

            def _hash(self):
                return

            def _setup_classes(self):
                return

        try:
            ds = DatasetSeries(pattern)[0]
            assert isinstance(ds, FakeDataset)

            ts = DatasetSeries(pattern, my_unsupported_kwarg=None)

            assert_raises(TypeError, ts.__getitem__, 0)
            # the exact error message is supposed to be this
            # """__init__() got an unexpected keyword argument 'my_unsupported_kwarg'"""
            # but it's hard to check for within the framework
        finally:
            # tear down to avoid possible breakage in following tests
            output_type_registry.pop("FakeDataset")
Example #18
0
    def get_time_series(
        self,
        time_data=True,
        redshift_data=True,
        initial_time=None,
        final_time=None,
        initial_redshift=None,
        final_redshift=None,
        initial_cycle=None,
        final_cycle=None,
        times=None,
        redshifts=None,
        tolerance=None,
        parallel=True,
        setup_function=None,
    ):

        """
        Instantiate a DatasetSeries object for a set of outputs.

        If no additional keywords given, a DatasetSeries object will be
        created with all potential datasets created by the simulation.

        Outputs can be gather by specifying a time or redshift range
        (or combination of time and redshift), with a specific list of
        times or redshifts, a range of cycle numbers (for cycle based
        output), or by simply searching all subdirectories within the
        simulation directory.

        time_data : bool
            Whether or not to include time outputs when gathering
            datasets for time series.
            Default: True.
        redshift_data : bool
            Whether or not to include redshift outputs when gathering
            datasets for time series.
            Default: True.
        initial_time : tuple of type (float, str)
            The earliest time for outputs to be included.  This should be
            given as the value and the string representation of the units.
            For example, (5.0, "Gyr").  If None, the initial time of the
            simulation is used.  This can be used in combination with
            either final_time or final_redshift.
            Default: None.
        final_time : tuple of type (float, str)
            The latest time for outputs to be included.  This should be
            given as the value and the string representation of the units.
            For example, (13.7, "Gyr"). If None, the final time of the
            simulation is used.  This can be used in combination with either
            initial_time or initial_redshift.
            Default: None.
        times : tuple of type (float array, str)
            A list of times for which outputs will be found and the units
            of those values.  For example, ([0, 1, 2, 3], "s").
            Default: None.
        initial_redshift : float
            The earliest redshift for outputs to be included.  If None,
            the initial redshift of the simulation is used.  This can be
            used in combination with either final_time or
            final_redshift.
            Default: None.
        final_redshift : float
            The latest redshift for outputs to be included.  If None,
            the final redshift of the simulation is used.  This can be
            used in combination with either initial_time or
            initial_redshift.
            Default: None.
        redshifts : array_like
            A list of redshifts for which outputs will be found.
            Default: None.
        initial_cycle : float
            The earliest cycle for outputs to be included.  If None,
            the initial cycle of the simulation is used.  This can
            only be used with final_cycle.
            Default: None.
        final_cycle : float
            The latest cycle for outputs to be included.  If None,
            the final cycle of the simulation is used.  This can
            only be used in combination with initial_cycle.
            Default: None.
        tolerance : float
            Used in combination with "times" or "redshifts" keywords,
            this is the tolerance within which outputs are accepted
            given the requested times or redshifts.  If None, the
            nearest output is always taken.
            Default: None.
        parallel : bool/int
            If True, the generated DatasetSeries will divide the work
            such that a single processor works on each dataset.  If an
            integer is supplied, the work will be divided into that
            number of jobs.
            Default: True.
        setup_function : callable, accepts a ds
            This function will be called whenever a dataset is loaded.

        Examples
        --------

        >>> import yt
        >>> es = yt.load_simulation("enzo_tiny_cosmology/32Mpc_32.enzo", "Enzo")
        >>> es.get_time_series(
        ...     initial_redshift=10, final_time=(13.7, "Gyr"), redshift_data=False
        ... )
        >>> for ds in es:
        ...     print(ds.current_time)
        >>> es.get_time_series(redshifts=[3, 2, 1, 0])
        >>> for ds in es:
        ...     print(ds.current_time)

        """

        if (
            initial_redshift is not None or final_redshift is not None
        ) and not self.cosmological_simulation:
            raise InvalidSimulationTimeSeries(
                "An initial or final redshift has been given for a "
                + "noncosmological simulation."
            )

        if time_data and redshift_data:
            my_all_outputs = self.all_outputs
        elif time_data:
            my_all_outputs = self.all_time_outputs
        elif redshift_data:
            my_all_outputs = self.all_redshift_outputs
        else:
            raise InvalidSimulationTimeSeries(
                "Both time_data and redshift_data are False."
            )

        if not my_all_outputs:
            DatasetSeries.__init__(self, outputs=[], parallel=parallel)
            mylog.info("0 outputs loaded into time series.")
            return

        # Apply selection criteria to the set.
        if times is not None:
            my_outputs = self._get_outputs_by_key(
                "time", times, tolerance=tolerance, outputs=my_all_outputs
            )

        elif redshifts is not None:
            my_outputs = self._get_outputs_by_key(
                "redshift", redshifts, tolerance=tolerance, outputs=my_all_outputs
            )

        elif initial_cycle is not None or final_cycle is not None:
            if initial_cycle is None:
                initial_cycle = 0
            else:
                initial_cycle = max(initial_cycle, 0)
            if final_cycle is None:
                final_cycle = self.parameters["StopCycle"]
            else:
                final_cycle = min(final_cycle, self.parameters["StopCycle"])

            my_outputs = my_all_outputs[
                int(
                    np.ceil(float(initial_cycle) / self.parameters["CycleSkipDataDump"])
                ) : (final_cycle / self.parameters["CycleSkipDataDump"])
                + 1
            ]

        else:
            if initial_time is not None:
                if isinstance(initial_time, float):
                    my_initial_time = self.quan(initial_time, "code_time")
                elif isinstance(initial_time, tuple) and len(initial_time) == 2:
                    my_initial_time = self.quan(*initial_time)
                elif not isinstance(initial_time, unyt_array):
                    raise RuntimeError(
                        "Error: initial_time must be given as a float or "
                        + "tuple of (value, units)."
                    )
            elif initial_redshift is not None:
                my_initial_time = self.cosmology.t_from_z(initial_redshift)
            else:
                my_initial_time = self.initial_time

            if final_time is not None:
                if isinstance(final_time, float):
                    my_final_time = self.quan(final_time, "code_time")
                elif isinstance(final_time, tuple) and len(final_time) == 2:
                    my_final_time = self.quan(*final_time)
                elif not isinstance(final_time, unyt_array):
                    raise RuntimeError(
                        "Error: final_time must be given as a float or "
                        + "tuple of (value, units)."
                    )
            elif final_redshift is not None:
                my_final_time = self.cosmology.t_from_z(final_redshift)
            else:
                my_final_time = self.final_time

            my_initial_time.convert_to_units("s")
            my_final_time.convert_to_units("s")
            my_times = np.array([a["time"] for a in my_all_outputs])
            my_indices = np.digitize([my_initial_time, my_final_time], my_times)
            if my_initial_time == my_times[my_indices[0] - 1]:
                my_indices[0] -= 1
            my_outputs = my_all_outputs[my_indices[0] : my_indices[1]]

        init_outputs = []
        for output in my_outputs:
            if os.path.exists(output["filename"]):
                init_outputs.append(output["filename"])

        DatasetSeries.__init__(
            self, outputs=init_outputs, parallel=parallel, setup_function=setup_function
        )
        mylog.info("%d outputs loaded into time series.", len(init_outputs))
Example #19
0
def load(fn, *args, **kwargs):
    """
    Load a Dataset or DatasetSeries object.
    The data format is automatically discovered, and the exact return type is the
    corresponding subclass of :class:`yt.data_objects.static_output.Dataset`.
    A :class:`yt.data_objects.time_series.DatasetSeries` is created if the first
    argument is a pattern.

    Parameters
    ----------
    fn : str, os.Pathlike, or byte (types supported by os.path.expandusers)
        A path to the data location. This can be a file name, directory name, a glob
        pattern, or a url (for data types that support it).

    Additional arguments, if any, are passed down to the return class.

    Returns
    -------
    :class:`yt.data_objects.static_output.Dataset` object
        If fn is a single path, create a Dataset from the appropriate subclass.

    :class:`yt.data_objects.time_series.DatasetSeries`
        If fn is a glob pattern (i.e. containing wildcards '[]?!*'), create a series.

    Raises
    ------
    FileNotFoundError
        If fn does not match any existing file or directory.

    yt.utilities.exceptions.YTUnidentifiedDataType
        If fn matches existing files or directories with undetermined format.

    yt.utilities.exceptions.YTAmbiguousDataType
        If the data format matches more than one class of similar specilization levels.
    """
    fn = os.path.expanduser(fn)

    if any(wildcard in fn for wildcard in "[]?!*"):
        from yt.data_objects.time_series import DatasetSeries

        return DatasetSeries(fn, *args, **kwargs)

    # This will raise FileNotFoundError if the path isn't matched
    # either in the current dir or yt.config.ytcfg['data_dir_directory']
    if not fn.startswith("http"):
        fn = str(lookup_on_disk_data(fn))

    candidates = []
    for cls in output_type_registry.values():
        if cls._is_valid(fn, *args, **kwargs):
            candidates.append(cls)

    # Find only the lowest subclasses, i.e. most specialised front ends
    candidates = find_lowest_subclasses(candidates)

    if len(candidates) == 1:
        return candidates[0](fn, *args, **kwargs)

    if len(candidates) > 1:
        raise YTAmbiguousDataType(fn, candidates)

    raise YTUnidentifiedDataType(fn, *args, **kwargs)
def load(*args ,**kwargs):
    """
    This function attempts to determine the base data type of a filename or
    other set of arguments by calling
    :meth:`yt.data_objects.api.Dataset._is_valid` until it finds a
    match, at which point it returns an instance of the appropriate
    :class:`yt.data_objects.api.Dataset` subclass.
    """
    if len(args) == 0:
        try:
            from yt.extern.six.moves import tkinter
            import tkinter, tkFileDialog
        except ImportError:
            raise YTOutputNotIdentified(args, kwargs)
        root = tkinter.Tk()
        filename = tkFileDialog.askopenfilename(parent=root,title='Choose a file')
        if filename != None:
            return load(filename)
        else:
            raise YTOutputNotIdentified(args, kwargs)
    candidates = []
    args = [os.path.expanduser(arg) if isinstance(arg, str)
            else arg for arg in args]
    valid_file = []
    for argno, arg in enumerate(args):
        if isinstance(arg, str):
            if os.path.exists(arg):
                valid_file.append(True)
            elif arg.startswith("http"):
                valid_file.append(True)
            else:
                if os.path.exists(os.path.join(ytcfg.get("yt", "test_data_dir"), arg)):
                    valid_file.append(True)
                    args[argno] = os.path.join(ytcfg.get("yt", "test_data_dir"), arg)
                else:
                    valid_file.append(False)
        else:
            valid_file.append(False)
    if not any(valid_file):
        try:
            from yt.data_objects.time_series import DatasetSeries
            ts = DatasetSeries.from_filenames(*args, **kwargs)
            return ts
        except YTOutputNotIdentified:
            pass
        mylog.error("None of the arguments provided to load() is a valid file")
        mylog.error("Please check that you have used a correct path")
        raise YTOutputNotIdentified(args, kwargs)
    for n, c in output_type_registry.items():
        if n is None: continue
        if c._is_valid(*args, **kwargs): candidates.append(n)

    # convert to classes
    candidates = [output_type_registry[c] for c in candidates]
    # Find only the lowest subclasses, i.e. most specialised front ends
    candidates = find_lowest_subclasses(candidates)
    if len(candidates) == 1:
        return candidates[0](*args, **kwargs)
    if len(candidates) == 0:
        if ytcfg.get("yt", "enzo_db") != '' \
           and len(args) == 1 \
           and isinstance(args[0], str):
            erdb = EnzoRunDatabase()
            fn = erdb.find_uuid(args[0])
            n = "EnzoDataset"
            if n in output_type_registry \
               and output_type_registry[n]._is_valid(fn):
                return output_type_registry[n](fn)
        mylog.error("Couldn't figure out output type for %s", args[0])
        raise YTOutputNotIdentified(args, kwargs)

    mylog.error("Multiple output type candidates for %s:", args[0])
    for c in candidates:
        mylog.error("    Possible: %s", c)
    raise YTOutputNotIdentified(args, kwargs)
Example #21
0
def read_yt(filename):
    ds = DatasetSeries(filename)
    return YTGlueData(ds)
Example #22
0
    def __init__(self, outputs, indices, fields=None, suppress_logging=False):

        indices.sort()  # Just in case the caller wasn't careful
        self.field_data = YTFieldData()
        if isinstance(outputs, DatasetSeries):
            self.data_series = outputs
        else:
            self.data_series = DatasetSeries(outputs)
        self.masks = []
        self.sorts = []
        self.array_indices = []
        self.indices = indices
        self.num_indices = len(indices)
        self.num_steps = len(outputs)
        self.times = []
        self.suppress_logging = suppress_logging

        if fields is None: fields = []
        fields = list(OrderedDict.fromkeys(fields))

        if self.suppress_logging:
            old_level = int(ytcfg.get("yt", "loglevel"))
            mylog.setLevel(40)

        fds = {}
        ds_first = self.data_series[0]
        dd_first = ds_first.all_data()
        idx_field = dd_first._determine_fields("particle_index")[0]
        for field in ("particle_position_%s" % ax for ax in "xyz"):
            fds[field] = dd_first._determine_fields(field)[0]

        my_storage = {}
        pbar = get_pbar("Constructing trajectory information",
                        len(self.data_series))
        for i, (sto,
                ds) in enumerate(self.data_series.piter(storage=my_storage)):
            dd = ds.all_data()
            newtags = dd[idx_field].ndarray_view().astype("int64")
            mask = np.in1d(newtags, indices, assume_unique=True)
            sort = np.argsort(newtags[mask])
            array_indices = np.where(
                np.in1d(indices, newtags, assume_unique=True))[0]
            self.array_indices.append(array_indices)
            self.masks.append(mask)
            self.sorts.append(sort)

            pfields = {}
            for field in ("particle_position_%s" % ax for ax in "xyz"):
                pfields[field] = dd[fds[field]].ndarray_view()[mask][sort]

            sto.result_id = ds.parameter_filename
            sto.result = (ds.current_time, array_indices, pfields)
            pbar.update(i)
        pbar.finish()

        if self.suppress_logging:
            mylog.setLevel(old_level)

        times = []
        for fn, (time, indices, pfields) in sorted(my_storage.items()):
            times.append(time)
        self.times = self.data_series[0].arr([time for time in times],
                                             times[0].units)

        self.particle_fields = []
        output_field = np.empty((self.num_indices, self.num_steps))
        output_field.fill(np.nan)
        for field in ("particle_position_%s" % ax for ax in "xyz"):
            for i, (fn, (time, indices,
                         pfields)) in enumerate(sorted(my_storage.items())):
                output_field[indices, i] = pfields[field]
            self.field_data[field] = array_like_field(dd_first,
                                                      output_field.copy(),
                                                      fds[field])
            self.particle_fields.append(field)

        # Instantiate fields the caller requested
        self._get_data(fields)
class ParticleTrajectories(object):
    r"""A collection of particle trajectories in time over a series of
    datasets. 

    The ParticleTrajectories object contains a collection of
    particle trajectories for a specified set of particle indices. 
    
    Parameters
    ----------
    outputs : `yt.data_objects.time_series.DatasetSeries` or list of strings
        DatasetSeries object, or a time-sorted list of filenames to
        construct a new DatasetSeries object.
    indices : array_like
        An integer array of particle indices whose trajectories we
        want to track. If they are not sorted they will be sorted.
    fields : list of strings, optional
        A set of fields that is retrieved when the trajectory
        collection is instantiated.
        Default : None (will default to the fields 'particle_position_x',
        'particle_position_y', 'particle_position_z')
    suppress_logging : boolean
        Suppress yt's logging when iterating over the simulation time
        series.
        Default : False

    Examples
    ________
    >>> from yt.mods import *
    >>> my_fns = glob.glob("orbit_hdf5_chk_00[0-9][0-9]")
    >>> my_fns.sort()
    >>> fields = ["particle_position_x", "particle_position_y",
    >>>           "particle_position_z", "particle_velocity_x",
    >>>           "particle_velocity_y", "particle_velocity_z"]
    >>> ds = load(my_fns[0])
    >>> init_sphere = ds.sphere(ds.domain_center, (.5, "unitary"))
    >>> indices = init_sphere["particle_index"].astype("int")
    >>> trajs = ParticleTrajectories(my_fns, indices, fields=fields)
    >>> for t in trajs :
    >>>     print t["particle_velocity_x"].max(), t["particle_velocity_x"].min()
    """
    def __init__(self, outputs, indices, fields=None, suppress_logging=False):

        indices.sort() # Just in case the caller wasn't careful
        self.field_data = YTFieldData()
        if isinstance(outputs, DatasetSeries):
            self.data_series = outputs
        else:
            self.data_series = DatasetSeries(outputs)
        self.masks = []
        self.sorts = []
        self.array_indices = []
        self.indices = indices
        self.num_indices = len(indices)
        self.num_steps = len(outputs)
        self.times = []
        self.suppress_logging = suppress_logging

        # Default fields 
        
        if fields is None: fields = []
        fields.append("particle_position_x")
        fields.append("particle_position_y")
        fields.append("particle_position_z")
        fields = list(OrderedDict.fromkeys(fields))

        if self.suppress_logging:
            old_level = int(ytcfg.get("yt","loglevel"))
            mylog.setLevel(40)
        my_storage = {}
        pbar = get_pbar("Constructing trajectory information", len(self.data_series))
        for i, (sto, ds) in enumerate(self.data_series.piter(storage=my_storage)):
            dd = ds.all_data()
            idx_field = dd._determine_fields("particle_index")[0]
            newtags = dd[idx_field].ndarray_view().astype("int64")
            mask = np.in1d(newtags, indices, assume_unique=True)
            sorts = np.argsort(newtags[mask])
            self.array_indices.append(np.where(np.in1d(indices, newtags, assume_unique=True))[0])
            self.masks.append(mask)
            self.sorts.append(sorts)
            sto.result_id = ds.parameter_filename
            sto.result = ds.current_time
            pbar.update(i)
        pbar.finish()

        if self.suppress_logging:
            mylog.setLevel(old_level)

        times = []
        for fn, time in sorted(my_storage.items()):
            times.append(time)

        self.times = self.data_series[0].arr([time for time in times], times[0].units)

        self.particle_fields = []

        # Instantiate fields the caller requested

        for field in fields:
            self._get_data(field)

    def has_key(self, key):
        return (key in self.field_data)
    
    def keys(self):
        return self.field_data.keys()

    def __getitem__(self, key):
        """
        Get the field associated with key.
        """
        if key == "particle_time":
            return self.times
        if key not in self.field_data:
            self._get_data(key)
        return self.field_data[key]
    
    def __setitem__(self, key, val):
        """
        Sets a field to be some other value.
        """
        self.field_data[key] = val
                        
    def __delitem__(self, key):
        """
        Delete the field from the trajectory
        """
        del self.field_data[key]

    def __iter__(self):
        """
        This iterates over the trajectories for
        the different particles, returning dicts
        of fields for each trajectory
        """
        for idx in range(self.num_indices):
            traj = {}
            traj["particle_index"] = self.indices[idx]
            traj["particle_time"] = self.times
            for field in self.field_data.keys():
                traj[field] = self[field][idx,:]
            yield traj
            
    def __len__(self):
        """
        The number of individual trajectories
        """
        return self.num_indices

    def add_fields(self, fields):
        """
        Add a list of fields to an existing trajectory

        Parameters
        ----------
        fields : list of strings
            A list of fields to be added to the current trajectory
            collection.

        Examples
        ________
        >>> from yt.mods import *
        >>> trajs = ParticleTrajectories(my_fns, indices)
        >>> trajs.add_fields(["particle_mass", "particle_gpot"])
        """
        for field in fields:
            if field not in self.field_data:
                self._get_data(field)
                
    def _get_data(self, field):
        """
        Get a field to include in the trajectory collection.
        The trajectory collection itself is a dict of 2D numpy arrays,
        with shape (num_indices, num_steps)
        """
        if field not in self.field_data:
            if self.suppress_logging:
                old_level = int(ytcfg.get("yt","loglevel"))
                mylog.setLevel(40)
            ds_first = self.data_series[0]
            dd_first = ds_first.all_data()
            fd = dd_first._determine_fields(field)[0]
            if field not in self.particle_fields:
                if self.data_series[0].field_info[fd].particle_type:
                    self.particle_fields.append(field)
            particles = np.empty((self.num_indices,self.num_steps))
            particles[:] = np.nan
            step = int(0)
            pbar = get_pbar("Generating field %s in trajectories." % (field), self.num_steps)
            my_storage={}
            for i, (sto, ds) in enumerate(self.data_series.piter(storage=my_storage)):
                mask = self.masks[i]
                sort = self.sorts[i]
                if field in self.particle_fields:
                    # This is easy... just get the particle fields
                    dd = ds.all_data()
                    pfield = dd[fd].ndarray_view()[mask][sort]
                else:
                    # This is hard... must loop over grids
                    pfield = np.zeros((self.num_indices))
                    x = self["particle_position_x"][:,step].ndarray_view()
                    y = self["particle_position_y"][:,step].ndarray_view()
                    z = self["particle_position_z"][:,step].ndarray_view()
                    # This will fail for non-grid index objects
                    particle_grids, particle_grid_inds = ds.index._find_points(x,y,z)
                    for grid in particle_grids:
                        cube = grid.retrieve_ghost_zones(1, [fd])
                        CICSample_3(x,y,z,pfield,
                                    self.num_indices,
                                    cube[fd],
                                    np.array(grid.LeftEdge).astype(np.float64),
                                    np.array(grid.ActiveDimensions).astype(np.int32),
                                    grid.dds[0])
                sto.result_id = ds.parameter_filename
                sto.result = (self.array_indices[i], pfield)
                pbar.update(step)
                step += 1
            pbar.finish()
            for i, (fn, (indices, pfield)) in enumerate(sorted(my_storage.items())):
                particles[indices,i] = pfield
            self.field_data[field] = array_like_field(dd_first, particles, fd)
            if self.suppress_logging:
                mylog.setLevel(old_level)
        return self.field_data[field]

    def trajectory_from_index(self, index):
        """
        Retrieve a single trajectory corresponding to a specific particle
        index

        Parameters
        ----------
        index : int
            This defines which particle trajectory from the
            ParticleTrajectories object will be returned.

        Returns
        -------
        A dictionary corresponding to the particle's trajectory and the
        fields along that trajectory

        Examples
        --------
        >>> from yt.mods import *
        >>> import matplotlib.pylab as pl
        >>> trajs = ParticleTrajectories(my_fns, indices)
        >>> traj = trajs.trajectory_from_index(indices[0])
        >>> pl.plot(traj["particle_time"], traj["particle_position_x"], "-x")
        >>> pl.savefig("orbit")
        """
        mask = np.in1d(self.indices, (index,), assume_unique=True)
        if not np.any(mask):
            print("The particle index %d is not in the list!" % (index))
            raise IndexError
        fields = [field for field in sorted(self.field_data.keys())]
        traj = {}
        traj["particle_time"] = self.times
        traj["particle_index"] = index
        for field in fields:
            traj[field] = self[field][mask,:][0]
        return traj

    @parallel_root_only
    def write_out(self, filename_base):
        """
        Write out particle trajectories to tab-separated ASCII files (one
        for each trajectory) with the field names in the file header. Each
        file is named with a basename and the index number.

        Parameters
        ----------
        filename_base : string
            The prefix for the outputted ASCII files.

        Examples
        --------
        >>> from yt.mods import *
        >>> trajs = ParticleTrajectories(my_fns, indices)
        >>> trajs.write_out("orbit_trajectory")       
        """
        fields = [field for field in sorted(self.field_data.keys())]
        num_fields = len(fields)
        first_str = "# particle_time\t" + "\t".join(fields)+"\n"
        template_str = "%g\t"*num_fields+"%g\n"
        for ix in range(self.num_indices):
            outlines = [first_str]
            for it in range(self.num_steps):
                outlines.append(template_str %
                                tuple([self.times[it]]+[self[field][ix,it] for field in fields]))
            fid = open(filename_base + "_%d.dat" % self.indices[ix], "w")
            fid.writelines(outlines)
            fid.close()
            del fid

    @parallel_root_only
    def write_out_h5(self, filename):
        """
        Write out all the particle trajectories to a single HDF5 file
        that contains the indices, the times, and the 2D array for each
        field individually

        Parameters
        ----------

        filename : string
            The output filename for the HDF5 file

        Examples
        --------

        >>> from yt.mods import *
        >>> trajs = ParticleTrajectories(my_fns, indices)
        >>> trajs.write_out_h5("orbit_trajectories")                
        """
        fid = h5py.File(filename, "w")
        fields = [field for field in sorted(self.field_data.keys())]
        fid.create_dataset("particle_indices", dtype=np.int32,
                           data=self.indices)
        fid.create_dataset("particle_time", data=self.times)
        for field in fields:
            fid.create_dataset("%s" % field, data=self[field])
        fid.close()
Example #24
0
def load(*args, **kwargs):
    """
    This function attempts to determine the base data type of a filename or
    other set of arguments by calling
    :meth:`yt.data_objects.api.Dataset._is_valid` until it finds a
    match, at which point it returns an instance of the appropriate
    :class:`yt.data_objects.api.Dataset` subclass.
    """
    if len(args) == 0:
        try:
            import Tkinter, tkFileDialog
        except ImportError:
            raise YTOutputNotIdentified(args, kwargs)
        root = Tkinter.Tk()
        filename = tkFileDialog.askopenfilename(parent=root,
                                                title='Choose a file')
        if filename != None:
            return load(filename)
        else:
            raise YTOutputNotIdentified(args, kwargs)
    candidates = []
    args = [
        os.path.expanduser(arg) if isinstance(arg, types.StringTypes) else arg
        for arg in args
    ]
    valid_file = []
    for argno, arg in enumerate(args):
        if isinstance(arg, types.StringTypes):
            if os.path.exists(arg):
                valid_file.append(True)
            elif arg.startswith("http"):
                valid_file.append(True)
            else:
                if os.path.exists(
                        os.path.join(ytcfg.get("yt", "test_data_dir"), arg)):
                    valid_file.append(True)
                    args[argno] = os.path.join(
                        ytcfg.get("yt", "test_data_dir"), arg)
                else:
                    valid_file.append(False)
        else:
            valid_file.append(False)
    if not any(valid_file):
        try:
            from yt.data_objects.time_series import DatasetSeries
            ts = DatasetSeries.from_filenames(*args, **kwargs)
            return ts
        except YTOutputNotIdentified:
            pass
        mylog.error("None of the arguments provided to load() is a valid file")
        mylog.error("Please check that you have used a correct path")
        raise YTOutputNotIdentified(args, kwargs)
    for n, c in output_type_registry.items():
        if n is None: continue
        if c._is_valid(*args, **kwargs): candidates.append(n)
    if len(candidates) == 1:
        return output_type_registry[candidates[0]](*args, **kwargs)
    if len(candidates) == 0:
        if ytcfg.get("yt", "enzo_db") != '' \
           and len(args) == 1 \
           and isinstance(args[0], types.StringTypes):
            erdb = EnzoRunDatabase()
            fn = erdb.find_uuid(args[0])
            n = "EnzoDataset"
            if n in output_type_registry \
               and output_type_registry[n]._is_valid(fn):
                return output_type_registry[n](fn)
        mylog.error("Couldn't figure out output type for %s", args[0])
        raise YTOutputNotIdentified(args, kwargs)
    mylog.error("Multiple output type candidates for %s:", args[0])
    for c in candidates:
        mylog.error("    Possible: %s", c)
    raise YTOutputNotIdentified(args, kwargs)
Example #25
0
    def get_time_series(self, initial_time=None, final_time=None,
                        initial_redshift=None, final_redshift=None,
                        times=None, redshifts=None, tolerance=None,
                        parallel=True, setup_function=None):

        """
        Instantiate a DatasetSeries object for a set of outputs.

        If no additional keywords given, a DatasetSeries object will be
        created with all potential datasets created by the simulation.

        Outputs can be gather by specifying a time or redshift range
        (or combination of time and redshift), with a specific list of
        times or redshifts), or by simply searching all subdirectories 
        within the simulation directory.

        initial_time : tuple of type (float, str)
            The earliest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (5.0, "Gyr").  If None, the initial time of the 
            simulation is used.  This can be used in combination with 
            either final_time or final_redshift.
            Default: None.
        final_time : tuple of type (float, str)
            The latest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (13.7, "Gyr"). If None, the final time of the 
            simulation is used.  This can be used in combination with either 
            initial_time or initial_redshift.
            Default: None.
        times : tuple of type (float array, str)
            A list of times for which outputs will be found and the units 
            of those values.  For example, ([0, 1, 2, 3], "s").
            Default: None.
        initial_redshift : float
            The earliest redshift for outputs to be included.  If None,
            the initial redshift of the simulation is used.  This can be
            used in combination with either final_time or
            final_redshift.
            Default: None.
        final_redshift : float
            The latest redshift for outputs to be included.  If None,
            the final redshift of the simulation is used.  This can be
            used in combination with either initial_time or
            initial_redshift.
            Default: None.
        redshifts : array_like
            A list of redshifts for which outputs will be found.
            Default: None.
        tolerance : float
            Used in combination with "times" or "redshifts" keywords,
            this is the tolerance within which outputs are accepted
            given the requested times or redshifts.  If None, the
            nearest output is always taken.
            Default: None.
        parallel : bool/int
            If True, the generated DatasetSeries will divide the work
            such that a single processor works on each dataset.  If an
            integer is supplied, the work will be divided into that
            number of jobs.
            Default: True.
        setup_function : callable, accepts a ds
            This function will be called whenever a dataset is loaded.

        Examples
        --------

        >>> import yt
        >>> gs = yt.simulation("my_simulation.par", "Gadget")
        
        >>> gs.get_time_series(initial_redshift=10, final_time=(13.7, "Gyr"))

        >>> gs.get_time_series(redshifts=[3, 2, 1, 0])

        >>> # after calling get_time_series
        >>> for ds in gs.piter():
        ...     p = ProjectionPlot(ds, "x", "density")
        ...     p.save()

        >>> # An example using the setup_function keyword
        >>> def print_time(ds):
        ...     print(ds.current_time)
        >>> gs.get_time_series(setup_function=print_time)
        >>> for ds in gs:
        ...     SlicePlot(ds, "x", "Density").save()

        """

        if (initial_redshift is not None or \
            final_redshift is not None) and \
            not self.cosmological_simulation:
            raise InvalidSimulationTimeSeries(
                "An initial or final redshift has been given for a " +
                "noncosmological simulation.")

        my_all_outputs = self.all_outputs
        if not my_all_outputs:
            DatasetSeries.__init__(self, outputs=[], parallel=parallel,
                                   unit_base=self.unit_base)
            mylog.info("0 outputs loaded into time series.")
            return

        # Apply selection criteria to the set.
        if times is not None:
            my_outputs = self._get_outputs_by_key("time", times,
                                                  tolerance=tolerance,
                                                  outputs=my_all_outputs)

        elif redshifts is not None:
            my_outputs = self._get_outputs_by_key("redshift",
                                                  redshifts, tolerance=tolerance,
                                                  outputs=my_all_outputs)

        else:
            if initial_time is not None:
                if isinstance(initial_time, float):
                    initial_time = self.quan(initial_time, "code_time")
                elif isinstance(initial_time, tuple) and len(initial_time) == 2:
                    initial_time = self.quan(*initial_time)
                elif not isinstance(initial_time, unyt_array):
                    raise RuntimeError(
                        "Error: initial_time must be given as a float or " +
                        "tuple of (value, units).")
            elif initial_redshift is not None:
                my_initial_time = self.cosmology.t_from_z(initial_redshift)
            else:
                my_initial_time = self.initial_time

            if final_time is not None:
                if isinstance(final_time, float):
                    final_time = self.quan(final_time, "code_time")
                elif isinstance(final_time, tuple) and len(final_time) == 2:
                    final_time = self.quan(*final_time)
                elif not isinstance(final_time, unyt_array):
                    raise RuntimeError(
                        "Error: final_time must be given as a float or " +
                        "tuple of (value, units).")
                my_final_time = final_time.in_units("s")
            elif final_redshift is not None:
                my_final_time = self.cosmology.t_from_z(final_redshift)
            else:
                my_final_time = self.final_time

            my_initial_time.convert_to_units("s")
            my_final_time.convert_to_units("s")
            my_times = np.array([a["time"] for a in my_all_outputs])
            my_indices = np.digitize([my_initial_time, my_final_time], my_times)
            if my_initial_time == my_times[my_indices[0] - 1]: my_indices[0] -= 1
            my_outputs = my_all_outputs[my_indices[0]:my_indices[1]]

        init_outputs = []
        for output in my_outputs:
            if os.path.exists(output["filename"]):
                init_outputs.append(output["filename"])
        if len(init_outputs) == 0 and len(my_outputs) > 0:
            mylog.warning("Could not find any datasets.  " +
                          "Check the value of OutputDir in your parameter file.")
            
        DatasetSeries.__init__(self, outputs=init_outputs, parallel=parallel,
                                setup_function=setup_function,
                                unit_base=self.unit_base)
        mylog.info("%d outputs loaded into time series.", len(init_outputs))
Example #26
0
if not os.path.exists('%s/rockstar/' % sim.path()):
    os.mkdir('%s/rockstar/' % sim.path())

#cd to rockstar/
os.chdir('rockstar/')
print 'In dir: %s' % os.getcwd()
print 'Starting rockstar...'

outputs = np.arange(1, sim.num_snapshots() + 1)
dirs = []
#Add the datasets
for ioutput in outputs:
    #ds = yt.load('../output_%05d/info_%05d.txt'%(ioutput, ioutput))
    ds = sim.snapshot(ioutput, module='yt').raw_snapshot()
    #assert(ds.add_particle_filter("dark_matter"))
    dirs.append(ds)

#es = yt.load('../output_*/info_*.txt')
es = DatasetSeries(dirs, setup_function=setup_ds)
#es = DatasetSeries(dirs)

readers = int(ncpu / 4.)
#Reserve one cpu for the server
writers = ncpu - readers - 1
print 'Running rockstar with %i writers and %i readers' % (writers, readers)
rh = RockstarHaloFinder(es,
                        num_readers=readers,
                        num_writers=writers,
                        particle_type="dark_matter")
rh.run()