Esempio n. 1
0
def load_simulation(fn, simulation_type, find_outputs=False):
    """
    Load a simulation time series object of the specified simulation type.

    Parameters
    ----------
    fn : str, os.Pathlike, or byte (types supported by os.path.expandusers)
        Name of the data file or directory.

    simulation_type : str
        E.g. 'Enzo'

    find_outputs : bool
        Defaults to False

    Raises
    ------
    FileNotFoundError
        If fn is not found.

    yt.utilities.exceptions.YTSimulationNotIdentified
        If simulation_type is unknown.
    """

    fn = str(lookup_on_disk_data(fn))

    try:
        cls = simulation_time_series_registry[simulation_type]
    except KeyError as e:
        raise YTSimulationNotIdentified(simulation_type) from e

    return cls(fn, find_outputs=find_outputs)
Esempio n. 2
0
def load(fn, *args, **kwargs):
    """
    Load a Dataset or DatasetSeries object.
    The data format is automatically discovered, and the exact return type is the
    corresponding subclass of :class:`yt.data_objects.static_output.Dataset`.
    A :class:`yt.data_objects.time_series.DatasetSeries` is created if the first
    argument is a pattern.

    Parameters
    ----------
    fn : str, os.Pathlike, or byte (types supported by os.path.expandusers)
        A path to the data location. This can be a file name, directory name, a glob
        pattern, or a url (for data types that support it).

    Additional arguments, if any, are passed down to the return class.

    Returns
    -------
    :class:`yt.data_objects.static_output.Dataset` object
        If fn is a single path, create a Dataset from the appropriate subclass.

    :class:`yt.data_objects.time_series.DatasetSeries`
        If fn is a glob pattern (i.e. containing wildcards '[]?!*'), create a series.

    Raises
    ------
    FileNotFoundError
        If fn does not match any existing file or directory.

    yt.utilities.exceptions.YTUnidentifiedDataType
        If fn matches existing files or directories with undetermined format.

    yt.utilities.exceptions.YTAmbiguousDataType
        If the data format matches more than one class of similar specilization levels.
    """
    fn = os.path.expanduser(fn)

    if any(wildcard in fn for wildcard in "[]?!*"):
        from yt.data_objects.time_series import DatasetSeries

        return DatasetSeries(fn, *args, **kwargs)

    # This will raise FileNotFoundError if the path isn't matched
    # either in the current dir or yt.config.ytcfg['data_dir_directory']
    if not fn.startswith("http"):
        fn = str(lookup_on_disk_data(fn))

    candidates = []
    for cls in output_type_registry.values():
        if cls._is_valid(fn, *args, **kwargs):
            candidates.append(cls)

    # Find only the lowest subclasses, i.e. most specialised front ends
    candidates = find_lowest_subclasses(candidates)

    if len(candidates) == 1:
        return candidates[0](fn, *args, **kwargs)

    if len(candidates) > 1:
        raise YTAmbiguousDataType(fn, candidates)

    raise YTUnidentifiedDataType(fn, *args, **kwargs)
Esempio n. 3
0
def load_sample(fn, progressbar: bool = True, timeout=None, **kwargs):
    """
    Load sample data with yt. Simple wrapper around `yt.load` to include fetching
    data with pooch.

    The data registry table can be retrieved and visualized using
    `yt.sample_data.api.get_data_registry_table`.

    This function requires pandas and pooch to be installed.

    Parameters
    ----------
    fn : str
        The `filename` of the dataset to load, as defined in the data registry
        table.

    progressbar: bool
        display a progress bar (tqdm).

    timeout: float or int (optional)
        Maximal waiting time, in seconds, after which download is aborted.
        `None` means "no limit". This parameter is directly passed to down to
        requests.get via pooch.HTTPDownloader

    Any additional keyword argument is passed down to `yt.load`.
    Note that in case of collision with predefined keyword arguments as set in
    the data registry, the ones passed to this function take priority.
    """

    from yt.sample_data.api import (
        _download_sample_data_file,
        _get_test_data_dir_path,
        get_data_registry_table,
    )

    pooch_logger = pooch.utils.get_logger()

    topdir, _, specific_file = str(fn).partition(os.path.sep)

    registry_table = get_data_registry_table()
    # PR 3089
    # note: in the future the registry table should be reindexed
    # so that the following line can be replaced with
    #
    # specs = registry_table.loc[fn]
    #
    # however we don't want to do it right now because the "filename" column is
    # currently incomplete

    try:
        specs = registry_table.query(f"`filename` == '{topdir}'").iloc[0]
    except IndexError as err:
        raise KeyError(f"Could not find '{fn}' in the registry.") from err

    if not specs["load_name"]:
        raise ValueError(
            "Registry appears to be corrupted: could not find a 'load_name' entry for this dataset."
        )

    kwargs = {**specs["load_kwargs"], **kwargs}

    try:
        data_dir = lookup_on_disk_data(fn)
    except FileNotFoundError:
        mylog.info("'%s' is not available locally. Looking up online.", fn)
    else:
        # if the data is already available locally, `load_sample`
        # only acts as a thin wrapper around `load`
        loadable_path = data_dir.joinpath(specs["load_name"], specific_file)
        mylog.info("Sample dataset found in '%s'", data_dir)
        if timeout is not None:
            mylog.info("Ignoring the `timeout` keyword argument received.")
        return load(loadable_path, **kwargs)

    try:
        save_dir = _get_test_data_dir_path()
    except FileNotFoundError:
        mylog.warning(
            "yt test data directory is not properly set up. "
            "Data will be saved to the current work directory instead.")
        save_dir = Path.cwd()

    # effectively silence the pooch's logger and create our own log instead
    pooch_logger.setLevel(100)
    mylog.info("Downloading from %s", specs["url"])

    # downloading via a pooch.Pooch instance behind the scenes
    filename = urlsplit(specs["url"]).path.split("/")[-1]

    tmp_file = _download_sample_data_file(filename,
                                          progressbar=progressbar,
                                          timeout=timeout)

    # pooch has functionalities to unpack downloaded archive files,
    # but it needs to be told in advance that we are downloading a tarball.
    # Since that information is not necessarily trival to guess from the filename,
    # we rely on the standard library to perform a conditional unpacking instead.
    if tarfile.is_tarfile(tmp_file):
        mylog.info("Untaring downloaded file to '%s'", save_dir)
        with tarfile.open(tmp_file) as fh:
            fh.extractall(save_dir)
        os.remove(tmp_file)
    else:
        os.replace(tmp_file, save_dir)

    loadable_path = Path.joinpath(save_dir, fn, specs["load_name"],
                                  specific_file)

    if specific_file and not loadable_path.exists():
        raise ValueError(f"Could not find file '{specific_file}'.")

    return load(loadable_path, **kwargs)