super(GFDL_data_CMIP6DataSourceAttributes, self).__post_init__(log, model, experiment) class Gfdldatacmip6DataManager(data_sources.CMIP6ExperimentSelectionMixin, GFDL_GCP_FileDataSourceBase): """DataSource for accessing pre-publication CMIP6 data on /data_cmip6. """ _FileRegexClass = cmip6.CMIP6_DRSPath _DirectoryRegex = cmip6.drs_directory_regex _AttributesClass = GFDL_data_CMIP6DataSourceAttributes _fetch_method = "gcp" # RegexPattern that matches any string (path) that doesn't end with ".nc". _ignore_non_nc_regex = util.RegexPattern(r".*(?<!\.nc)") # match files ending in .nc only if they aren't of the form .tile#.nc # (negative lookback) _ignore_tiles_regex = util.RegexPattern(r".*\.tile\d\.nc$") # match any paths corresponding to time average data (/av/), since currently # we only deal with timeseries data (/ts/) _ignore_time_avg_regex = util.RegexPattern(r"/?([a-zA-Z0-9_-]+)/av/\S*") # RegexPattern matching any of the above -- description of files that are OK # to silently ignore during /pp/ directory crawl pp_ignore_regex = util.ChainedRegexPattern(_ignore_time_avg_regex, _ignore_tiles_regex, _ignore_non_nc_regex) # can't combine these with the path regexes (below) since static dir regex should # only be used with static files _pp_dir_regex = util.RegexPattern(r"""
else: raise ValueError("Malformed data {} {}".format(self.quantity, self.unit)) __str__ = format def __copy__(self): return self.__class__(self.format()) def __deepcopy__(self, memo): return self.__class__(self.format()) # =========================================================================== variant_label_regex = util.RegexPattern(r""" (r(?P<realization_index>\d+))? # (optional) int prefixed with 'r' (i(?P<initialization_index>\d+))? # (optional) int prefixed with 'i' (p(?P<physics_index>\d+))? # (optional) int prefixed with 'p' (f(?P<forcing_index>\d+))? # (optional) int prefixed with 'f' """, input_field="variant_label" ) @util.regex_dataclass(variant_label_regex) @util.mdtf_dataclass class CMIP6_VariantLabel(): """Dataclass which represents and parses the CMIP6 DRS variant label identifier string. Reference: `<http://goo.gl/v1drZl>`__, note 8 on page 9. """ variant_label: str = util.MANDATORY realization_index: int = None initialization_index: int = None physics_index: int = None forcing_index: int = None
the user via ``--data_manager``; see :doc:`ref_data_sources` and :doc:`fmwk_datasources`. """ import os import collections import dataclasses from src import util, core, diagnostic, xr_parser, preprocessor, cmip6 from src import data_manager as dm import pandas as pd import logging _log = logging.getLogger(__name__) # RegexPattern that matches any string (path) that doesn't end with ".nc". ignore_non_nc_regex = util.RegexPattern(r".*(?<!\.nc)") sample_data_regex = util.RegexPattern(r""" (?P<sample_dataset>\S+)/ # first directory: model name (?P<frequency>\w+)/ # subdirectory: data frequency # file name = model name + variable name + frequency (?P=sample_dataset)\.(?P<variable>\w+)\.(?P=frequency)\.nc """, input_field="remote_path", match_error_filter=ignore_non_nc_regex) @util.regex_dataclass(sample_data_regex) class SampleDataFile(): """Dataclass describing catalog entries for sample model data files. """