def parse_DRS_filename(file_): """Function to parse DRS filename, using regex defined above. Reference: `<http://goo.gl/v1drZl>`__, page 14-15. Args: file_ (str): filename to be parsed. Returns: dict of file attributes determined by the DRS naming convention. """ match = re.match(drs_filename_regex, file_) if match: md = match.groupdict() if md['start_date'] is not None and md['end_date'] is not None: md['start_date'] = datelabel.Date(md['start_date']) md['end_date'] = datelabel.Date(md['end_date']) md['date_range'] = datelabel.DateRange(md['start_date'], md['end_date']) else: # We're dealing with static/fx-frequency data, so use special # placeholder values md['start_date'] = datelabel.FXDateMin md['end_date'] = datelabel.FXDateMax md['date_range'] = datelabel.FXDateRange md.update(parse_mip_table_id(md['table_id'])) # verify consistency of FXDates and frequency == fx: if md['date_range'].is_static != md['date_freq'].is_static: raise ValueError( "Can't parse date range in filename {}.".format(file_)) return md else: raise ValueError("Can't parse filename {}.".format(file_))
def test_all_attrs(self): file_ = 'prw_Amon_GFDL-ESM4_historical_r1i1p1f1_gr1_195001-201412.nc' d = cmip6.parse_DRS_filename(file_) self.assertEqual(d['variable_id'], 'prw') self.assertEqual(d['table_id'], 'Amon') self.assertEqual(d['source_id'], 'GFDL-ESM4') self.assertEqual(d['experiment_id'], 'historical') self.assertEqual(d['realization_code'], 'r1i1p1f1') self.assertEqual(d['grid_label'], 'gr1') self.assertEqual(d['start_date'], dl.Date(1950, 1)) self.assertEqual(d['end_date'], dl.Date(2014, 12)) self.assertEqual(d['date_range'], dl.DateRange('195001-201412'))
def __init__(self, case_dict, DateFreqMixin=None): if not DateFreqMixin: self.DateFreq = datelabel.DateFrequency else: self.DateFreq = DateFreqMixin self.case_name = case_dict['CASENAME'] self.model_name = case_dict['model'] self.firstyr = datelabel.Date(case_dict['FIRSTYR']) self.lastyr = datelabel.Date(case_dict['LASTYR']) self.date_range = datelabel.DateRange(self.firstyr, self.lastyr) self.convention = case_dict.get('convention', 'CF') if 'data_freq' in case_dict: self.data_freq = self.DateFreq(case_dict['data_freq']) else: self.data_freq = None self.pod_list = case_dict['pod_list'] self.pods = [] config = util_mdtf.ConfigManager() self.envvars = config.global_envvars.copy() # gets appended to # assign explicitly else linter complains self.dry_run = config.config.dry_run self.file_transfer_timeout = config.config.file_transfer_timeout self.make_variab_tar = config.config.make_variab_tar self.keep_temp = config.config.keep_temp self.overwrite = config.config.overwrite self.file_overwrite = self.overwrite # overwrite config and .tar d = config.paths.model_paths(case_dict, overwrite=self.overwrite) self.code_root = config.paths.CODE_ROOT self.MODEL_DATA_DIR = d.MODEL_DATA_DIR self.MODEL_WK_DIR = d.MODEL_WK_DIR self.MODEL_OUT_DIR = d.MODEL_OUT_DIR self.TEMP_HTML = os.path.join(self.MODEL_WK_DIR, 'pod_output_temp.html') # dynamic inheritance to add netcdf manipulation functions # source: https://stackoverflow.com/a/8545134 # mixin = config.config.get(netcdf_helper, 'NcoNetcdfHelper') # hardwire now, since NCO is all that's implemented mixin = getattr(netcdf_helper, 'NcoNetcdfHelper') self.__class__ = type(self.__class__.__name__, (self.__class__, mixin), {}) try: self.nc_check_environ() # make sure we have dependencies except Exception: raise
def test_ts_parse(self): dm = _DummyGfdlppDataManager() dir_ = 'atmos_cmip/ts/daily/5yr' file_ = 'atmos_cmip.20100101-20141231.rsdscsdiff.nc' ds = dm.parse_relative_path(dir_, file_) self.assertEqual(ds.component, 'atmos_cmip') self.assertEqual(ds.date_freq, dt.DateFrequency('day')) self.assertEqual(ds.chunk_freq, dt.DateFrequency(5, 'yr')) self.assertEqual(ds.start_date, dt.Date(2010, 1, 1)) self.assertEqual(ds.end_date, dt.Date(2014, 12, 31)) self.assertEqual(ds.name_in_model, 'rsdscsdiff') self.assertEqual( ds._remote_data, '/pp/atmos_cmip/ts/daily/5yr/atmos_cmip.20100101-20141231.rsdscsdiff.nc' ) self.assertEqual(ds.date_range, dt.DateRange('20100101-20141231'))
def _decide_allowed_components(self): tables = choose.minimum_cover(self.data_files, attrgetter('table_id'), self._cmip6_table_tiebreaker) dkeys_for_each_pod = list(self.data_pods.inverse().values()) grid_lbl = choose.all_same_if_possible(self.data_files, dkeys_for_each_pod, attrgetter('grid_label'), self._cmip6_grid_tiebreaker) version_date = choose.require_all_same( self.data_files, attrgetter('version_date'), lambda dates: str(max(datelabel.Date(dt) for dt in dates))) choices = dict.fromkeys(self.data_files) for data_key in choices: choices[data_key] = self.UndecidedKey( table_id=str(tables[data_key]), grid_label=grid_lbl[data_key], version_date=version_date[data_key]) return choices
def parse_DRS_directory(dir_): """Function to parse DRS directory, using regex defined above. Reference: `<http://goo.gl/v1drZl>`__, page 17. .. warning:: This regex will fail on paths involving subexperiments. Args: dir_ (str): directory path to be parsed. Returns: dict of directory attributes determined by the DRS naming convention. """ match = re.match(drs_directory_regex, dir_) if match: md = match.groupdict() md['version_date'] = datelabel.Date(md['version_date']) md.update(parse_mip_table_id(md['table_id'])) return md else: raise ValueError("Can't parse dir {}.".format(dir_))