Exemple #1
0
        super(GFDL_data_CMIP6DataSourceAttributes,
              self).__post_init__(log, model, experiment)


class Gfdldatacmip6DataManager(data_sources.CMIP6ExperimentSelectionMixin,
                               GFDL_GCP_FileDataSourceBase):
    """DataSource for accessing pre-publication CMIP6 data on /data_cmip6.
    """
    _FileRegexClass = cmip6.CMIP6_DRSPath
    _DirectoryRegex = cmip6.drs_directory_regex
    _AttributesClass = GFDL_data_CMIP6DataSourceAttributes
    _fetch_method = "gcp"


# RegexPattern that matches any string (path) that doesn't end with ".nc".
_ignore_non_nc_regex = util.RegexPattern(r".*(?<!\.nc)")
# match files ending in .nc only if they aren't of the form .tile#.nc
# (negative lookback)
_ignore_tiles_regex = util.RegexPattern(r".*\.tile\d\.nc$")
# match any paths corresponding to time average data (/av/), since currently
# we only deal with timeseries data (/ts/)
_ignore_time_avg_regex = util.RegexPattern(r"/?([a-zA-Z0-9_-]+)/av/\S*")
# RegexPattern matching any of the above -- description of files that are OK
# to silently ignore during /pp/ directory crawl
pp_ignore_regex = util.ChainedRegexPattern(_ignore_time_avg_regex,
                                           _ignore_tiles_regex,
                                           _ignore_non_nc_regex)

# can't combine these with the path regexes (below) since static dir regex should
# only be used with static files
_pp_dir_regex = util.RegexPattern(r"""
Exemple #2
0
        else: 
            raise ValueError("Malformed data {} {}".format(self.quantity, self.unit))
    __str__ = format

    def __copy__(self):
        return self.__class__(self.format())

    def __deepcopy__(self, memo):
        return self.__class__(self.format())

# ===========================================================================

variant_label_regex = util.RegexPattern(r"""
        (r(?P<realization_index>\d+))?    # (optional) int prefixed with 'r'
        (i(?P<initialization_index>\d+))? # (optional) int prefixed with 'i'
        (p(?P<physics_index>\d+))?        # (optional) int prefixed with 'p'
        (f(?P<forcing_index>\d+))?        # (optional) int prefixed with 'f'
    """,
    input_field="variant_label"
)
@util.regex_dataclass(variant_label_regex)
@util.mdtf_dataclass
class CMIP6_VariantLabel():
    """Dataclass which represents and parses the CMIP6 DRS variant label identifier string.

    Reference: `<http://goo.gl/v1drZl>`__, note 8 on page 9.
    """
    variant_label: str = util.MANDATORY
    realization_index: int = None
    initialization_index: int = None
    physics_index: int = None
    forcing_index: int = None
the user via ``--data_manager``; see :doc:`ref_data_sources` and
:doc:`fmwk_datasources`.
"""
import os
import collections
import dataclasses
from src import util, core, diagnostic, xr_parser, preprocessor, cmip6
from src import data_manager as dm
import pandas as pd

import logging

_log = logging.getLogger(__name__)

# RegexPattern that matches any string (path) that doesn't end with ".nc".
ignore_non_nc_regex = util.RegexPattern(r".*(?<!\.nc)")

sample_data_regex = util.RegexPattern(r"""
        (?P<sample_dataset>\S+)/    # first directory: model name
        (?P<frequency>\w+)/         # subdirectory: data frequency
        # file name = model name + variable name + frequency
        (?P=sample_dataset)\.(?P<variable>\w+)\.(?P=frequency)\.nc
    """,
                                      input_field="remote_path",
                                      match_error_filter=ignore_non_nc_regex)


@util.regex_dataclass(sample_data_regex)
class SampleDataFile():
    """Dataclass describing catalog entries for sample model data files.
    """