def __init__(self, unittest=False): if unittest: # value not used, when we're testing will mock out call to read_json # below with actual translation table to use for test file_ = 'dummy_filename' else: config = util_mdtf.ConfigManager() file_ = os.path.join(config.paths.CODE_ROOT, 'src', 'cmip6-cmor-tables', 'Tables', 'CMIP6_CV.json') self._contents = util.read_json(file_) self._contents = self._contents['CV'] for k in [ 'product', 'version_metadata', 'required_global_attributes', 'further_info_url', 'Conventions', 'license' ]: # remove unecessary information del self._contents[k] # munge table_ids self._contents['table_id'] = dict.fromkeys(self._contents['table_id']) for tbl in self._contents['table_id']: self._contents['table_id'][tbl] = parse_mip_table_id(tbl) self.cv = dict() self._lookups = dict()
def __init__(self, pod_name, verbose=0): """POD initializer. Given a POD name, we attempt to read a settings.json file in a subdirectory of ``/diagnostics`` by that name and parse the contents. Args: pod_name (:py:obj:`str`): Name of the POD to initialize. verbose (:py:obj:`int`, optional): Logging verbosity level. Default 0. """ config = util_mdtf.ConfigManager() assert pod_name in config.pods # define attributes manually so linter doesn't complain # others are set in _parse_pod_settings self.driver = "" self.program = "" self.pod_env_vars = dict() self.skipped = None self.POD_CODE_DIR = "" self.POD_OBS_DATA = "" self.POD_WK_DIR = "" self.POD_OUT_DIR = "" self.TEMP_HTML = "" self.name = pod_name self.code_root = config.paths.CODE_ROOT self.dry_run = config.config.get('dry_run', False) d = config.pods[pod_name] self.__dict__.update(self._parse_pod_settings(d['settings'])) self.varlist = self._parse_pod_varlist(d['varlist'])
def __init__(self, case_dict, DateFreqMixin=None): # load required modules modMgr = ModuleManager() modMgr.load('gcp') # should refactor config = util_mdtf.ConfigManager() config.config.netcdf_helper = 'NcoNetcdfHelper' # HACK for now super(GfdlarchiveDataManager, self).__init__(case_dict, DateFreqMixin) assert ('CASE_ROOT_DIR' in case_dict) if not os.path.isdir(case_dict['CASE_ROOT_DIR']): raise DataAccessError( None, "Can't access CASE_ROOT_DIR = '{}'".format( case_dict['CASE_ROOT_DIR'])) self.root_dir = case_dict['CASE_ROOT_DIR'] self.tape_filesystem = is_on_tape_filesystem(self.root_dir) self.frepp_mode = config.config.get('frepp', False) if self.frepp_mode: self.overwrite = True # flag to not overwrite config and .tar: want overwrite for frepp self.file_overwrite = True # if overwrite=False, WK_DIR & OUT_DIR will have been set to a # unique name in parent's init. Set it back so it will be overwritten. d = config.paths.modelPaths(self, overwrite=True) self.MODEL_WK_DIR = d.MODEL_WK_DIR self.MODEL_OUT_DIR = d.MODEL_OUT_DIR
def test_pathmgr_model(self): config = util_mdtf.ConfigManager() case = DataManager(self.case_dict) d = config.paths.model_paths(case) self.assertEqual(d['MODEL_DATA_DIR'], 'TEST_MODEL_DATA_ROOT/A') self.assertEqual(d['MODEL_WK_DIR'], 'TEST_WORKING_DIR/MDTF_A_1900_2100')
def cleanup_tempdirs(self, signum=None, frame=None): # delete temp files util.signal_logger(self.__class__.__name__, signum, frame) config = util_mdtf.ConfigManager() tmpdirs = util_mdtf.TempDirManager() if not config.config.get('keep_temp', False): tmpdirs.cleanup()
def tearDown(self): # TODO: handle OSErrors in all of these config = util_mdtf.ConfigManager() self._make_html() _ = self._backup_config_file(config) if self.make_variab_tar: _ = self._make_tar_file(config.paths.OUTPUT_DIR) self._copy_to_output()
def test_convert_pod_figures(self, mock_subprocess, mock_glob): # assert we munged filenames correctly config = util_mdtf.ConfigManager(unittest=True) pod = Diagnostic('DUMMY_POD') pod.POD_WK_DIR = 'A' pod._convert_pod_figures(config) mock_system.assert_has_calls( [mock.call('convert -C A/model/PS/B.ps A/model/B.png')])
def test_pathmgr_pod(self): config = util_mdtf.ConfigManager() case = DataManager(self.case_dict) pod = Diagnostic('AA') d = config.paths.pod_paths(pod, case) self.assertEqual(d['POD_CODE_DIR'], 'TEST_CODE_ROOT/diagnostics/AA') self.assertEqual(d['POD_OBS_DATA'], 'TEST_OBS_DATA_ROOT/AA') self.assertEqual(d['POD_WK_DIR'], 'TEST_WORKING_DIR/MDTF_A_1900_2100/AA')
def test_parse_pod_varlist_freq(self): config = util_mdtf.ConfigManager(unittest=True) config.pods['DUMMY_POD'] = { 'settings': {}, 'varlist': [{ 'var_name': 'pr_var', 'freq': 'not_a_frequency' }] } self.assertRaises(AssertionError, Diagnostic, 'A')
def test_pathmgr_global_asserterror(self): d = { 'OBS_DATA_ROOT': 'B', 'MODEL_DATA_ROOT': 'C', 'WORKING_DIR': 'D', 'OUTPUT_DIR': 'E' } config = util_mdtf.ConfigManager() self.assertRaises(AssertionError, config.paths.parse, d, list(d.keys()))
def convert_pod_figures(self, src_subdir, dest_subdir): """Convert all vector graphics in `POD_WK_DIR/subdir` to .png files using ghostscript. All vector graphics files (identified by extension) in any subdirectory of `POD_WK_DIR/src_subdir` are converted to .png files by running `ghostscript <https://www.ghostscript.com/>`__ in a subprocess. Ghostscript is included in the _MDTF_base conda environment. Afterwards, any bitmap files (identified by extension) in any subdirectory of `POD_WK_DIR/src_subdir` are moved to `POD_WK_DIR/dest_subdir`, preserving and subdirectories (see doc for :func:`~util.recursive_copy`.) Args: src_subdir: Subdirectory tree of `POD_WK_DIR` to search for vector graphics files. dest_subdir: Subdirectory tree of `POD_WK_DIR` to move converted bitmap files to. """ config = util_mdtf.ConfigManager() abs_src_subdir = os.path.join(self.POD_WK_DIR, src_subdir) abs_dest_subdir = os.path.join(self.POD_WK_DIR, dest_subdir) files = util.find_files( abs_src_subdir, ['*.ps', '*.PS', '*.eps', '*.EPS', '*.pdf', '*.PDF']) for f in files: f_stem, _ = os.path.splitext(f) _ = util.run_shell_command( 'gs {flags} -sOutputFile="{f_out}" {f_in}'.format( flags=config.config.get('convert_flags', ''), f_in=f, f_out=f_stem + '_MDTF_TEMP_%d.png')) # syntax for f_out above appends "_MDTF_TEMP" + page number to # output files. If input .ps/.pdf file had multiple pages, this will # generate 1 png per page. Page numbering starts at 1. Now check # how many files gs created: out_files = glob.glob(f_stem + '_MDTF_TEMP_?.png') if not out_files: raise OSError("Error: no png generated from {}".format(f)) elif len(out_files) == 1: # got one .png, so remove suffix. os.rename(out_files[0], f_stem + '.png') else: # Multiple .pngs. Drop the MDTF_TEMP suffix and renumber starting # from zero (forget which POD requires this.) for n in list(range(len(out_files))): os.rename(f_stem + '_MDTF_TEMP_{}.png'.format(n + 1), f_stem + '-{}.png'.format(n)) # move converted figures and any figures that were saved directly as bitmaps files = util.find_files(abs_src_subdir, ['*.png', '*.gif', '*.jpg', '*.jpeg']) util.recursive_copy(files, abs_src_subdir, abs_dest_subdir, copy_function=shutil.move, overwrite=False)
def __init__(self, verbose=0): config = util_mdtf.ConfigManager() self.test_mode = config.config.test_mode self.pods = [] self.envs = set() # kill any subprocesses that are still active if we exit normally # (shouldn't be necessary) or are killed atexit.register(self.subprocess_cleanup) signal.signal(signal.SIGTERM, self.subprocess_cleanup) signal.signal(signal.SIGINT, self.subprocess_cleanup)
def setUp(self, verbose=0): config = util_mdtf.ConfigManager() try: super(GfdlDiagnostic, self).setUp(verbose) make_remote_dir(self.POD_OUT_DIR, timeout=config.config.get('file_transfer_timeout', 0), dry_run=config.config.get('dry_run', False)) self._has_placeholder = True except PodRequirementFailure: raise
def test_check_pod_driver_no_program_1(self, mock_exists): # assertion fail if can't recognize driver's extension config = util_mdtf.ConfigManager(unittest=True) config.pods['DUMMY_POD'] = { 'settings': { 'driver': 'C.foo' }, 'varlist': [] } pod = Diagnostic('DUMMY_POD') self.assertRaises(PodRequirementFailure, pod._check_pod_driver)
def test_parse_pod_settings(self): # normal operation config = util_mdtf.ConfigManager(unittest=True) config.pods['DUMMY_POD'] = { 'settings': { 'required_programs': 'B' }, 'varlist': [] } pod = Diagnostic('DUMMY_POD') self.assertEqual(pod.name, 'DUMMY_POD') self.assertEqual(pod.required_programs, 'B')
def _make_tar_file(self, tar_dest_dir): # make locally in WORKING_DIR and gcp to destination, # since OUTPUT_DIR might be mounted read-only config = util_mdtf.ConfigManager() out_file = super(GfdlarchiveDataManager, self)._make_tar_file(config.paths.WORKING_DIR) gcp_wrapper(out_file, tar_dest_dir, timeout=self.file_transfer_timeout, dry_run=self.dry_run) _, file_ = os.path.split(out_file) return os.path.join(tar_dest_dir, file_)
def test_parse_pod_varlist(self): # normal operation config = util_mdtf.ConfigManager(unittest=True) config.pods['DUMMY_POD'] = { 'settings': {}, 'varlist': [{ 'var_name': 'pr_var', 'freq': 'mon', 'requirement': 'required' }] } pod = Diagnostic('DUMMY_POD') self.assertEqual(pod.varlist[0]['required'], True)
def test_check_pod_driver_program(self, mock_exists): # fill in absolute path and fill in program from driver's extension config = util_mdtf.ConfigManager(unittest=True) config.pods['DUMMY_POD'] = { 'settings': { 'driver': 'C.ncl' }, 'varlist': [] } pod = Diagnostic('DUMMY_POD') pod._check_pod_driver() self.assertEqual(pod.driver, 'TEST_CODE_ROOT/diagnostics/A/C.ncl') self.assertEqual(pod.program, 'ncl')
def test_check_for_varlist_files_found(self, mock_isfile): # case file is found config = util_mdtf.ConfigManager(unittest=True) config.pods['DUMMY_POD'] = { 'settings': {}, 'varlist': [{ 'var_name': 'pr_var', 'freq': 'mon' }] } pod = Diagnostic('DUMMY_POD') self._populate_pod__local_data(pod) (found, missing) = pod._check_for_varlist_files(pod.varlist) self.assertEqual(found, ['TEST_MODEL_DATA_ROOT/A/mon/A.PRECT.mon.nc']) self.assertEqual(missing, [])
def __init__(self, case_dict, DateFreqMixin=None): if not DateFreqMixin: self.DateFreq = datelabel.DateFrequency else: self.DateFreq = DateFreqMixin self.case_name = case_dict['CASENAME'] self.model_name = case_dict['model'] self.firstyr = datelabel.Date(case_dict['FIRSTYR']) self.lastyr = datelabel.Date(case_dict['LASTYR']) self.date_range = datelabel.DateRange(self.firstyr, self.lastyr) self.convention = case_dict.get('convention', 'CF') if 'data_freq' in case_dict: self.data_freq = self.DateFreq(case_dict['data_freq']) else: self.data_freq = None self.pod_list = case_dict['pod_list'] self.pods = [] config = util_mdtf.ConfigManager() self.envvars = config.global_envvars.copy() # gets appended to # assign explicitly else linter complains self.dry_run = config.config.dry_run self.file_transfer_timeout = config.config.file_transfer_timeout self.make_variab_tar = config.config.make_variab_tar self.keep_temp = config.config.keep_temp self.overwrite = config.config.overwrite self.file_overwrite = self.overwrite # overwrite config and .tar d = config.paths.model_paths(case_dict, overwrite=self.overwrite) self.code_root = config.paths.CODE_ROOT self.MODEL_DATA_DIR = d.MODEL_DATA_DIR self.MODEL_WK_DIR = d.MODEL_WK_DIR self.MODEL_OUT_DIR = d.MODEL_OUT_DIR self.TEMP_HTML = os.path.join(self.MODEL_WK_DIR, 'pod_output_temp.html') # dynamic inheritance to add netcdf manipulation functions # source: https://stackoverflow.com/a/8545134 # mixin = config.config.get(netcdf_helper, 'NcoNetcdfHelper') # hardwire now, since NCO is all that's implemented mixin = getattr(netcdf_helper, 'NcoNetcdfHelper') self.__class__ = type(self.__class__.__name__, (self.__class__, mixin), {}) try: self.nc_check_environ() # make sure we have dependencies except Exception: raise
def test_check_for_varlist_files_optional(self, mock_isfile): # case file is optional and not found config = util_mdtf.ConfigManager(unittest=True) config.pods['DUMMY_POD'] = { 'settings': {}, 'varlist': [{ 'var_name': 'pr_var', 'freq': 'mon', 'required': False }] } pod = Diagnostic('DUMMY_POD') self._populate_pod__local_data(pod) (found, missing) = pod._check_for_varlist_files(pod.varlist) self.assertEqual(found, []) self.assertEqual(missing, [])
def _framework_init(self, code_root, defaults_rel_path): # set up CLI and parse arguments # print('\tDEBUG: argv = {}'.format(sys.argv[1:])) cli_obj = cli.FrameworkCLIHandler(code_root, defaults_rel_path) self._cli_pre_parse_hook(cli_obj) cli_obj.parse_cli() self._cli_post_parse_hook(cli_obj) # load pod data pod_info_tuple = cli.load_pod_settings(code_root) # do nontrivial parsing config = util_mdtf.ConfigManager(cli_obj, pod_info_tuple) print(util.pretty_print_json(config.paths)) self.parse_mdtf_args(cli_obj, config) # config should be read-only from here on self._post_parse_hook(cli_obj, config) self._print_config(cli_obj, config)
def test_set_pod_env_vars_vars(self, mock_exists): # check definition of additional env vars config = util_mdtf.ConfigManager(unittest=True) config.pods['DUMMY_POD'] = { 'settings': { 'pod_env_vars': { 'D': 'E' } }, 'varlist': [] } pod = Diagnostic('DUMMY_POD') pod.POD_WK_DIR = 'A' pod._set_pod_env_vars() self.assertEqual(os.environ['D'], 'E') self.assertEqual(pod.pod_env_vars['D'], 'E')
def main_loop(self): config = util_mdtf.ConfigManager() self.manual_dispatch(config) caselist = [] # only run first case in list until dependence on env vars cleaned up for case_dict in self.case_list[0:1]: case = self.DataManager(case_dict) for pod_name in case.pod_list: try: pod = self.Diagnostic(pod_name) except AssertionError as error: print(str(error)) case.pods.append(pod) case.setUp() case.fetch_data() caselist.append(case)
def tearDown_ConfigManager(): # clear Singletons try: temp = util_mdtf.ConfigManager(unittest=True) temp._reset() except: pass try: temp = util_mdtf.VariableTranslator(unittest=True) temp._reset() except: pass try: temp = util_mdtf.TempDirManager() temp._reset() except: pass
def make_remote_dir(dest_dir, timeout=None, dry_run=None): try: os.makedirs(dest_dir) except OSError: # use GCP for this because output dir might be on a read-only filesystem. # apparently trying to test this with os.access is less robust than # just catching the error config = util_mdtf.ConfigManager() tmpdirs = util_mdtf.TempDirManager() work_dir = tmpdirs.make_tempdir() if timeout is None: timeout = config.config.get('file_transfer_timeout', 0) if dry_run is None: dry_run = config.config.get('dry_run', False) work_dir = os.path.join(work_dir, os.path.basename(dest_dir)) os.makedirs(work_dir) gcp_wrapper(work_dir, dest_dir, timeout=timeout, dry_run=dry_run)
def test_check_for_varlist_files_alternate(self, mock_isfile): # case alternate variable is specified and found config = util_mdtf.ConfigManager(unittest=True) config.pods['DUMMY_POD'] = { 'settings': {}, 'varlist': [{ 'var_name': 'pr_var', 'freq': 'mon', 'required': True, 'alternates': ['prc_var'] }] } pod = Diagnostic('DUMMY_POD') self._populate_pod__local_data(pod) (found, missing) = pod._check_for_varlist_files(pod.varlist) # name_in_model translation now done in DataManager._setup_pod self.assertEqual(found, ['TEST_MODEL_DATA_ROOT/A/mon/A.PRECC.mon.nc']) self.assertEqual(missing, [])
def make_pod_html(self): """Perform templating on POD's html results page(s). A wrapper for :func:`~util_mdtf.append_html_template`. Looks for all html files in POD_CODE_DIR, templates them, and copies them to POD_WK_DIR, respecting subdirectory structure (see doc for :func:`~util.recursive_copy`). """ config = util_mdtf.ConfigManager() template = config.global_envvars.copy() template.update(self.pod_env_vars) source_files = util.find_files(self.POD_CODE_DIR, '*.html') util.recursive_copy( source_files, self.POD_CODE_DIR, self.POD_WK_DIR, copy_function=lambda src, dest: util_mdtf.append_html_template( src, dest, template_dict=template, append=False), overwrite=True)
def _populate_pod__local_data(self, pod): # reproduce logic in DataManager._setup_pod rather than invoke it here config = util_mdtf.ConfigManager(unittest=True) translate = util_mdtf.VariableTranslator(unittest=True) case_name = 'A' ds_list = [] for var in pod.varlist: ds_list.append( DataSet.from_pod_varlist(pod.convention, var, {'DateFreqMixin': DateFrequency})) pod.varlist = ds_list for var in pod.iter_vars_and_alts(): var.name_in_model = translate.fromCF('not_CF', var.CF_name) freq = var.date_freq.format_local() var._local_data = os.path.join( config.paths.MODEL_DATA_ROOT, case_name, freq, "{}.{}.{}.nc".format(case_name, var.name_in_model, freq))
def cleanup_pod_files(self): """Copy and remove remaining files to `POD_WK_DIR`. In order, this 1) copies .pdf documentation (if any) from `POD_CODE_DIR/doc`, 2) copies any bitmap figures in any subdirectory of `POD_OBS_DATA` to `POD_WK_DIR/obs` (needed for legacy PODs without digested observational data), 3) removes vector graphics if requested, 4) removes netCDF scratch files in `POD_WK_DIR` if requested. Settings are set at runtime, when :class:`~util_mdtf.ConfigManager` is initialized. """ config = util_mdtf.ConfigManager() # copy PDF documentation (if any) to output files = util.find_files(os.path.join(self.POD_CODE_DIR, 'doc'), '*.pdf') for f in files: shutil.copy2(f, self.POD_WK_DIR) # copy premade figures (if any) to output # NOTE this will not respect files = util.find_files(self.POD_OBS_DATA, ['*.gif', '*.png', '*.jpg', '*.jpeg']) for f in files: shutil.copy2(f, os.path.join(self.POD_WK_DIR, 'obs')) # remove .eps files if requested (actually, contents of any 'PS' subdirs) if not config.config.save_ps: for d in util.find_files(self.POD_WK_DIR, 'PS' + os.sep): shutil.rmtree(d) # delete netCDF files, keep everything else if config.config.save_non_nc: for f in util.find_files(self.POD_WK_DIR, '*.nc'): os.remove(f) # delete all generated data # actually deletes contents of any 'netCDF' subdirs elif not config.config.save_nc: for d in util.find_files(self.POD_WK_DIR, 'netCDF' + os.sep): shutil.rmtree(d)