Ejemplo n.º 1
0
def test_get_project_base_dir():
    cmip5_base_dir = get_project_base_dir("cmip5")
    assert cmip5_base_dir == "/badc/cmip5/data/cmip5"

    c3s_cordex_base_dir = get_project_base_dir("c3s-cordex")
    assert c3s_cordex_base_dir == "/gws/nopw/j04/cp4cds1_vol1/data/c3s-cordex"

    with pytest.raises(Exception) as exc:
        get_project_base_dir("test")
    assert str(exc.value) == "The project supplied is not known."
Ejemplo n.º 2
0
def populate_dc_store():
    scan.get_dc_store = Mock(return_value=char_store)

    ds_paths = get_dataset_paths("cmip5",
                                 ds_ids=ds_ids,
                                 paths=get_project_base_dir("cmip5"))
    for ds_id, ds_path in ds_paths.items():
        scan_dataset("cmip5", ds_id, ds_path, "full", "ceda")
Ejemplo n.º 3
0
Archivo: scan.py Proyecto: roocs/dachar
def _get_ds_paths_from_paths(paths, project):
    """
    Return an OrderedDict of {<ds_id>: <ds_path>} found under the paths provided
    as `paths` (a sequence of directory/file paths).

    :param paths: (sequence) directory/file paths
    :param project: top-level project, e.g. "cmip5", "cmip6" or "cordex" (case-insensitive)
    :return: OrderedDict of {<ds_id>: <ds_path>}
    """
    base_dir = get_project_base_dir(project)
    # Check paths first
    bad_paths = []

    for pth in paths:
        if not pth.startswith(base_dir):
            bad_paths.append(pth)

    if bad_paths:
        raise Exception(f"Invalid paths provided: {bad_paths}")

    ds_paths = collections.OrderedDict()

    for pth in paths:

        LOGGER.info(f"Searching for datasets under: {pth}")
        facet_order = CONFIG[f'project:{project}']['facet_rule']
        facets_in_path = pth.replace(base_dir, "").strip("/").split("/")

        facets = {}

        for i, facet_name in enumerate(facet_order):
            if len(facets_in_path) <= i:
                break

            facets[facet_name] = facets_in_path[i]

        # Fix facet version if not set
        if not facets.get("version"):
            facets["version"] = "latest"

        facets_as_path = "/".join([facets.get(_, "*") for _ in facet_order])

        # Remove anything matching "files"
        if "/files" in facets_as_path:
            continue

        # TODO: This is repet code of below. Suggest we create a module/class
        #      to manage all mapping of different args to resolve to ds_paths dictionary, later.
        pattern = os.path.join(base_dir, facets_as_path)
        LOGGER.info(f"Finding dataset paths for pattern: {pattern}")

        for ds_path in glob.glob(pattern):
            ds_id = switch_ds.switch_ds(project, ds_path)
            ds_paths[ds_id] = ds_path

    return ds_paths
Ejemplo n.º 4
0
    def _load_ids(self):
        """ Gets list of possible ds_ids from sample_id"""
        base_dir = get_project_base_dir(self.project)
        _sample_id = os.path.join(base_dir,
                                  "/".join(self.sample_id.split(".")))

        self._sample = []
        for path in glob.glob(_sample_id):
            self._sample.append(".".join(path.split("/")[-11:]))

        return self._sample
Ejemplo n.º 5
0
def _consolidate_dset(dset):
    if dset.startswith('https'):
        raise Exception('This format is not supported yet')
    elif os.path.isfile(dset) or dset.endswith('.nc'):
        return dset
    elif os.path.isdir(dset):
        return os.path.join(dset, '*.nc')
    elif dset.count('.') > 6:
        project = get_project_name(dset)
        base_dir = get_project_base_dir(project)
        return base_dir.rstrip("/") + "/" + dset.replace(".", "/") + "/*.nc"
    else:
        raise Exception(f'The format of {dset} is not known.')
Ejemplo n.º 6
0
Archivo: scan.py Proyecto: roocs/dachar
def get_dataset_paths(project,
                      ds_ids=None,
                      paths=None,
                      facets=None,
                      exclude=None):
    """
    Converts the input arguments into an Ordered Dictionary of {DSID: directory} items.

    :param project: top-level project, e.g. "cmip5", "cmip6" or "cordex" (case-insensitive)
    :param ds_ids: sequence of dataset identifiers (DSIDs), OR None.
    :param paths: sequence of file paths to scan for NetCDF files under, OR None.
    :param facets: dictionary of facet values to limit the search, OR None.
    :param exclude: list of regular expressions to exclude in file paths, OR None.

    :return: An Ordered Dictionary of {dsid: directory}
    """
    base_dir = get_project_base_dir(project)
    ds_paths = collections.OrderedDict()

    # If ds_ids is defined then ignore all other arguments and use this list
    if ds_ids:

        for dsid in ds_ids:
            if not dsid:
                continue

            ds_path = switch_ds.switch_ds(project, dsid)
            ds_paths[dsid] = ds_path

    # Else use facets if they exist
    elif facets:

        facet_order = CONFIG[f'project:{project}']['facet_rule']
        facets_as_path = "/".join([facets.get(_, "*") for _ in facet_order])

        pattern = os.path.join(base_dir, facets_as_path)
        LOGGER.info(f"Finding dataset paths for pattern: {pattern}")

        for ds_path in glob.glob(pattern):
            ds_id = switch_ds.switch_ds(project, ds_path)
            ds_paths[ds_id] = ds_path

    elif paths:

        ds_paths = _get_ds_paths_from_paths(paths, project)

    else:
        raise NotImplementedError(
            'Code currently breaks if not using "ds_ids" argument.')

    return ds_paths
Ejemplo n.º 7
0
def switch_ds(project, ds):
    """
    Switches between ds_path and ds_id.

    :param project: top-level project
    :param ds: either dataset path or dataset ID (DSID)
    :return: either dataset path or dataset ID (DSID) - switched from the input.
    """
    base_dir = get_project_base_dir(project)

    if ds.startswith("/"):
        return ".".join(ds.replace(base_dir, "").strip("/").split("/"))
    else:
        return os.path.join(base_dir, "/".join(ds.split(".")))
Ejemplo n.º 8
0
import glob
import json
import os
import subprocess
import sys

import numpy as np
import pytest
import xarray as xr
from roocs_utils.project_utils import get_project_base_dir

from dachar.scan import scan
from dachar.utils import character


base_dir = get_project_base_dir("cmip5")

# def test_parser():
#     sys.argv = "scan.py -m MOHC/HadGEM2-ES -exp historical -e r1i1p1 -v rh".split()
#     args = scan.arg_parse()
#     for model in args.model:
#         assert model == "MOHC/HadGEM2-ES"
#     for experiment in args.experiment:
#         assert experiment == "historical"
#     for ensemble in args.ensemble:
#         assert ensemble == "r1i1p1"
#     for variable in args.var_id:
#         assert variable == "rh"
#
#
# def test_get_files():