def check_args_common(args): """Ensure common parser arguments are valid.""" from os import makedirs from os.path import dirname, basename from pyorac.definitions import FileName from pyorac.local_defaults import channels # If not explicitly given, assume input folder is in target definition if args.in_dir is None: args.in_dir = [ dirname(args.target), ] args.target = basename(args.target) else: if "/" in args.target: raise BadValue("file target", "contains a /") if args.out_dir is None: args.out_dir = args.in_dir[0] args.File = FileName(args.in_dir, args.target) if args.available_channels is None: args.available_channels = channels[args.File.sensor] for d in args.in_dir: if not isdir(d): raise FileMissing('in_dir', d) if not isdir(args.out_dir): makedirs(args.out_dir, 0o774) if not isdir(args.orac_dir): raise FileMissing('ORAC repository directory', args.orac_dir) if not isfile(args.orac_lib): raise FileMissing('ORAC library file', args.orac_lib)
def check_args_preproc(args): """Ensure preprocessor parser arguments are valid.""" from pyorac.definitions import FileName from pyorac.local_defaults import auxiliaries, global_attributes from pyorac.util import get_repository_revision # Add global attributes global_attributes.update({key: val for key, val in args.global_att}) args.__dict__.update(global_attributes) # Insert auxilliary locations auxiliaries.update({key: val for key, val in args.aux}) args.__dict__.update(auxiliaries) try: # When using ecmwf_dir to set a single directory args.ggam_dir = args.ecmwf_dir args.ggas_dir = args.ecmwf_dir args.spam_dir = args.ecmwf_dir except: pass # Limit should either be all zero or all non-zero. limit_check = args.limit[0] == 0 for limit_element in args.limit[1:]: if (limit_element == 0) ^ limit_check: warnings.warn('All elements of --limit should be non-zero.', OracWarning, stacklevel=2) # Update FileName class if args.revision is None: try: args.revision = args.File.revision except AttributeError: args.revision = get_repository_revision() if "revision" not in args.File.__dict__: args.File.revision = args.revision if "processor" not in args.File.__dict__: args.File.processor = args.processor if "project" not in args.File.__dict__: args.File.project = args.project if "product_name" not in args.File.__dict__: args.File.product_name = args.product_name if args.File.predef and args.l1_land_mask and not args.no_predef: raise ValueError("Do not set --l1_land_mask while using predefined " "geostationary geolocation.") if not isdir(args.atlas_dir): raise FileMissing('RTTOV Atlas directory', args.atlas_dir) #if not isfile(args.calib_file): # raise FileMissing('AATSR calibration file', args.calib_file) if not isdir(args.coef_dir): raise FileMissing('RTTOV coefficients directory', args.coef_dir) if not isdir(args.emis_dir): raise FileMissing('RTTOV emissivity directory', args.emis_dir) if not isdir(args.emos_dir): raise FileMissing('EMOS temporary directory', args.emos_dir)
def check_args_main(args): """Ensure main processor parser arguments are valid.""" if len(args.in_dir) > 1: warnings.warn('Main processor ignores all but first in_dir.', OracWarning, stacklevel=2) if not isdir(args.in_dir[0]): raise FileMissing('Preprocessed directory', args.in_dir[0]) for d in args.sad_dirs: if not isdir(d): raise FileMissing('sad_dirs', d) if args.use_channels is None: args.use_channels = args.available_channels if args.multilayer is not None: args.approach = "AppCld2L"
def _date_back_search(fdr, date, pattern): """Search a folder for the file with timestamp closest before a given date. Args: :str fdr: Folder to be searched. :datetime date: Initial date to consider. :str pattern: strftime format string used to parse filename. """ from copy import copy dt = copy(date) pttrn = copy(pattern) try_climat = True while True: files = glob(dt.strftime(os.path.join(fdr, pttrn))) if len(files) >= 1: return files[-1] elif try_climat: if glob(dt.strftime(os.path.join(fdr, '*%Y*'))): dt -= timedelta(days=1) else: pttrn = pttrn.replace('%Y', 'XXXX') try_climat = False else: if glob(os.path.join(fdr, '*XXXX*')): dt -= timedelta(days=1) else: raise FileMissing(fdr, pattern)
def check_args_cc4cl(args): """Ensure ORAC suite wrapper parser arguments are valid.""" from os import makedirs from os.path import isdir, join from pyorac.local_defaults import log_dir, extra_lines, retrieval_settings # Add extra lines files extra_lines.update({key + '_extra': val for key, val in args.extra_lines}) args.__dict__.update(extra_lines) log_path = join(args.out_dir, log_dir) if args.batch and not isdir(log_path): makedirs(log_path, 0o774) if args.preset_settings is not None: try: args.settings = retrieval_settings[args.preset_settings] except KeyError: raise BadValue("preset settings", "not defined in local_defaults") elif args.settings_file is not None: try: with open(args.settings_file) as settings_file: args.settings = settings_file.read().splitlines() except IOError: raise FileMissing('Description of settings', args.settings_file) elif args.settings is None: args.settings = retrieval_settings[args.File.sensor]
def check_args_postproc(args): """Ensure postprocessor parser arguments are valid.""" for fdr in args.in_dir: if not os.path.isdir(fdr): raise FileMissing('Processed output directory', fdr) return args
def build_postproc_driver(args, files): """Prepare a driver file for the postprocessor.""" # Use Bayesian type selection for all but standard Cloud CCI work cci_cloud = (len(files) == 2 and "wat" in files[0].lower() and "ice" in files[1].lower()) # Form driver file driver = """{multilayer} {wat_pri} {ice_pri} {wat_sec} {ice_sec} {out_pri} {out_sec} {switch} COST_THRESH={cost_tsh} NORM_PROB_THRESH={prob_tsh} OUTPUT_OPTICAL_PROPS_AT_NIGHT={opt_nght} VERBOSE={verbose} USE_CHUNKING={chunking} USE_NETCDF_COMPRESSION={compress} USE_NEW_BAYESIAN_SELECTION={bayesian}""".format( bayesian=not cci_cloud, chunking=args.chunking, compress=args.compress, cost_tsh=args.cost_thresh, ice_pri=files[1], ice_sec=files[1].replace('primary', 'secondary'), multilayer=args.approach == 'AppCld2L', opt_nght=not args.no_night_opt, out_pri=args.target, out_sec=args.target.replace('primary', 'secondary'), prob_tsh=args.prob_thresh, switch=not args.no_switch_phase, verbose=args.verbose, wat_pri=files[0], wat_sec=files[0].replace('primary', 'secondary'), ) # Add additional files for filename in files[2:]: driver += '\n' driver += filename driver += '\n' driver += filename.replace('primary', 'secondary') for part, filename in args.extra_lines: if part == "post" and filename != "": try: with open(filename, "r") as extra: driver += "\n" + extra.read() except IOError: raise FileMissing('extra_lines_file', filename) for sec, key, val in args.additional: if sec == "post": driver += f"\n{key} = {val}" return driver
def build_postproc_driver(args, files): """Prepare a driver file for the postprocessor. If the optional argument files is not specified, this will search args.in_dir for primary files with name given by args.target and args.phases.""" # Form driver file driver = """{multilayer} {wat_pri} {ice_pri} {wat_sec} {ice_sec} {out_pri} {out_sec} {switch} COST_THRESH={cost_tsh} NORM_PROB_THRESH={prob_tsh} OUTPUT_OPTICAL_PROPS_AT_NIGHT={opt_nght} VERBOSE={verbose} USE_CHUNKING={chunking} USE_NETCDF_COMPRESSION={compress} USE_NEW_BAYESIAN_SELECTION={bayesian}""".format( bayesian=args.phases != ['WAT', 'ICE'], chunking=args.chunking, compress=args.compress, cost_tsh=args.cost_thresh, ice_pri=files[1], ice_sec=files[1].replace('primary', 'secondary'), multilayer=args.approach == 'AppCld2L', opt_nght=not args.no_night_opt, out_pri=args.target, out_sec=args.target.replace('primary', 'secondary'), prob_tsh=args.prob_thresh, switch=args.switch_phase, verbose=args.verbose, wat_pri=files[0], wat_sec=files[0].replace('primary', 'secondary'), ) # Add additional files for filename in files[2:]: driver += '\n' driver += filename driver += '\n' driver += filename.replace('primary', 'secondary') for part, filename in args.extra_lines: if part == "post" and filename != "": try: with open(filename, "r") as extra: driver += "\n" + extra.read() except IOError: raise FileMissing('extra_lines_file', filename) for sec, key, val in args.additional: if sec == "post": driver += f"\n{key} = {val}" return driver
def process_post(args, log_path, files=None, dependency=None, tag='post'): """Call sequence for post processor""" from glob import glob from pyorac.drivers import build_postproc_driver from pyorac.definitions import FileMissing, SETTINGS from pyorac.local_defaults import DIR_PERMISSIONS args = check_args_postproc(args) job_name = args.File.job_name(args.revision, tag) root_name = args.File.root_name(args.revision) if not os.path.isdir(args.out_dir): os.makedirs(args.out_dir, DIR_PERMISSIONS) if files is None: # Find all primary files of requested phases in given input folders. files = [] for phs in set(args.phases): for fdr in args.in_dir: files.extend( glob( os.path.join( fdr, root_name + SETTINGS[phs].name + '.primary.nc'))) if len(files) < 2: raise FileMissing('sufficient processed files', args.target) out_file = os.path.join( args.out_dir, '.'.join(filter(None, (root_name, args.suffix, 'primary', 'nc')))) if args.clobber >= CLOBBER['post'] or not os.path.isfile(out_file): # Settings for batch processing values = { 'job_name': job_name, 'log_file': os.path.join(log_path, job_name + '.log'), 'err_file': os.path.join(log_path, job_name + '.err'), 'duration': args.dur[2], 'ram': args.ram[2] } if dependency is not None: values['depend'] = dependency args.target = out_file driver = build_postproc_driver(args, files) exe = os.path.join(args.orac_dir, 'post_processing', 'orac_postproc') if not os.path.isfile(exe): exe = os.path.join(args.orac_dir, 'orac_postproc') jid = call_exe(args, exe, driver, values) else: jid = None return jid, out_file
def _glob_dirs(dirs, path, desc): """Search a number of directories for files satisfying some simple regex""" from os import stat files = [f for d in dirs for f in glob(os.path.join(d, path))] if files: # Return the most recent file found files.sort(key=lambda x: stat(x).st_mtime, reverse=True) return files[0] # No file found, so throw error raise FileMissing(desc, path)
def _form_bound_filenames(bounds, fdr, form): """Form 2-element lists of filenames from bounding timestamps. Args: :list bounds: List of time to use. :str fdr: Folder containing BADC files. :str form: Formatting string for strftime""" out = [time.strftime(os.path.join(fdr, form)) for time in bounds] for i in range(len(out)): f2 = glob(out[i]) if len(f2) == 0: raise FileMissing('ECMWF file', out[i]) else: out[i] = f2[len(f2) - 1] return out
def _form_bound_filenames(bounds, fdr, form): """Form 2-element lists of filenames from bounding timestamps. Args: :list bounds: List of time to use. :str fdr: Folder containing BADC files. :str form: Formatting string for strftime""" out = [time.strftime(os.path.join(fdr, form)) for time in bounds] for i, path in enumerate(out): filename = glob(path) try: out[i] = filename[-1] except IndexError: raise FileMissing('ECMWF file', path) return out
def _date_back_search(fdr, date_in, pattern, interval): """Search a folder for the file with timestamp closest before a given date. Args: :str fdr: Folder to be searched. :datetime date: Initial date to consider. :str pattern: strftime format string used to parse filename. :str interval: Keyword of relativedelta indicating interval to step back. """ from copy import copy from dateutil.relativedelta import relativedelta # Step forward one day, month, or year delta = relativedelta(**{interval: 1}) date = copy(date_in) # We only want to look back so far, depending on the interval if interval == 'days': earliest = date - relativedelta(months=1) elif interval == 'months': earliest = date - relativedelta(years=1) else: earliest = datetime(1970, 1, 1) while date > earliest: # Look for a file with the appropriate date files = glob(date.strftime(os.path.join(fdr, pattern))) if len(files) >= 1: return files[-1] else: date -= delta # If we fail, try to find a climatological file files = glob( date.strftime(os.path.join(fdr, pattern.replace('%Y', 'XXXX')))) if len(files) >= 1: return files[-1] else: raise FileMissing(fdr, pattern)
def check_args_cc4cl(args): """Ensure ORAC suite wrapper parser arguments are valid.""" from pyorac.local_defaults import log_dir, extra_lines, retrieval_settings # Add extra lines files extra_lines.update({key + '_extra': val for key, val in args.extra_lines}) args.__dict__.update(extra_lines) log_path = os.path.join(args.out_dir, log_dir) if args.batch and not os.path.isdir(log_path): os.makedirs(log_path, 0o774) if args.settings_file is not None: # A procedure outlined in a file try: with open(args.settings_file) as settings_file: args.settings = settings_file.read().splitlines() except IOError: raise FileMissing('Description of settings', args.settings_file) elif args.preset_settings is not None: # A procedure named in local_defaults try: args.settings = retrieval_settings[args.preset_settings] except KeyError: raise BadValue("preset settings", "not defined in local_defaults") elif args.settings is None: if args.phase is None: # Default procedure for this sensor from local_defaults args.settings = retrieval_settings[args.File.sensor] else: # Process a single type args.settings = (" ", ) return args
def build_main_driver(args): """Prepare a driver file for the main processor.""" from pyorac.definitions import SETTINGS # Form mandatory driver file lines driver = """# ORAC New Driver File Ctrl%FID%Data_Dir = "{in_dir}" Ctrl%FID%Filename = "{fileroot}" Ctrl%FID%Out_Dir = "{out_dir}" Ctrl%FID%SAD_Dir = "{sad_dir}" Ctrl%InstName = "{sensor}" Ctrl%Ind%NAvail = {nch} Ctrl%Ind%Channel_Proc_Flag = {channels} Ctrl%LUTClass = "{phase}" Ctrl%Process_Cloudy_Only = {cloudy} Ctrl%Process_Aerosol_Only = {aerosoly} Ctrl%Verbose = {verbose} Ctrl%RS%Use_Full_BRDF = {use_brdf}""".format( aerosoly=args.aerosol_only, channels=','.join('1' if k in args.use_channels else '0' for k in args.available_channels), cloudy=args.cloud_only, fileroot=args.File.root_name(), in_dir=args.in_dir[0], nch=len(args.available_channels), out_dir=args.out_dir, phase=SETTINGS[args.phase].name, sad_dir=SETTINGS[args.phase].sad_dir(args.sad_dirs, args.File), sensor=args.File.inst, use_brdf=not (args.lambertian or args.approach == 'AppAerSw'), verbose=args.verbose, ) # If a netcdf LUT is being used then write NCDF LUT filename if SETTINGS[args.phase].sad == 'netcdf': driver += """ Ctrl%FID%NCDF_LUT_Filename = "{ncdf_lut_filename}" """.format(ncdf_lut_filename=SETTINGS[args.phase].sad_filename(args.File)) # Optional driver file lines if args.multilayer is not None: if SETTINGS[args.phase].sad == 'netcdf': driver += """ Ctrl%FID%NCDF_LUT_Filename2 = "{ncdf_lut_filename}" """.format(ncdf_lut_filename=SETTINGS[args.multilayer[0]].sad_filename(args.File)) driver += """ Ctrl%LUTClass2 = "{}" Ctrl%FID%SAD_Dir2 = "{}" Ctrl%Class2 = {}""".format( SETTINGS[args.multilayer[0]].name, SETTINGS[args.multilayer[0]].sad_dir(args.sad_dirs, args.File), args.multilayer[1], ) for var in SETTINGS[args.multilayer[0]].inv: driver += var.driver() if args.types: driver += "\nCtrl%NTypes_To_Process = {:d}".format(len(args.types)) driver += ("\nCtrl%Types_To_Process(1:{:d}) = ".format(len(args.types)) + ','.join(k + '_TYPE' for k in args.types)) if args.sabotage: driver += "\nCtrl%Sabotage_Inputs = true" if args.approach: driver += "\nCtrl%Approach = " + args.approach if args.ret_class: driver += "\nCtrl%Class = " + args.ret_class if args.no_sea: driver += "\nCtrl%Surfaces_To_Skip = ISea" elif args.no_land: driver += "\nCtrl%Surfaces_To_Skip = ILand" for var in SETTINGS[args.phase].inv: driver += var.driver() for part, filename in args.extra_lines: if part == "main" and filename != "": try: with open(filename, "r") as extra: driver += "\n" + extra.read() except IOError: raise FileMissing('extra_lines_file', filename) for sec, key, val in args.additional: if sec == "main": driver += f"\n{key} = {val}" return driver
def check_args_postproc(args): """Ensure postprocessor parser arguments are valid.""" for d in args.in_dir: if not isdir(d): raise FileMissing('Processed output directory', d)
def build_preproc_driver(args): """Prepare a driver file for the preprocessor.""" from pyorac.definitions import FileName, BadValue from pyorac.util import build_orac_library_path, read_orac_libraries from re import search from subprocess import check_output, STDOUT from uuid import uuid4 file = _glob_dirs(args.in_dir, args.File.l1b, 'L1B file') geo = _glob_dirs(args.in_dir, args.File.geo, 'geolocation file') # Select NISE file if args.use_ecmwf_snow or args.no_snow_corr: nise = '' else: for form in ('NISE.004/%Y.%m.%d/NISE_SSMISF17_%Y%m%d.HDFEOS', 'NISE.002/%Y.%m.%d/NISE_SSMIF13_%Y%m%d.HDFEOS', '%Y/NISE_SSMIF13_%Y%m%d.HDFEOS', '%Y/NISE_SSMIF17_%Y%m%d.HDFEOS'): nise = args.File.time.strftime(os.path.join(args.nise_dir, form)) if os.path.isfile(nise): break else: raise FileMissing('NISE', nise) # Select previous surface reflectance and emissivity files if args.swansea: alb = _date_back_search(args.swansea_dir, args.File.time, 'SW_SFC_PRMS_%m.nc') brdf = None else: alb = _date_back_search(args.mcd43c3_dir, args.File.time, 'MCD43C3.A%Y%j.*.hdf') brdf = None if args.lambertian else _date_back_search( args.mcd43c1_dir, args.File.time, 'MCD43C1.A%Y%j.*.hdf') emis = None if args.use_modis_emis else _date_back_search( args.emis_dir, args.File.time, 'global_emis_inf10_monthFilled_MYD11C3.A%Y%j.041.nc') # Select ECMWF files bounds = _bound_time(args.File.time + args.File.dur // 2) if args.ecmwf_flag == 0: ggam = _form_bound_filenames(bounds, args.ggam_dir, 'ERA_Interim_an_%Y%m%d_%H+00.nc') elif args.ecmwf_flag == 1: ggam = _form_bound_filenames(bounds, args.ggam_dir, 'ggam%Y%m%d%H%M.nc') ggas = _form_bound_filenames(bounds, args.ggas_dir, 'ggas%Y%m%d%H%M.nc') spam = _form_bound_filenames(bounds, args.spam_dir, 'gpam%Y%m%d%H%M.nc') elif args.ecmwf_flag == 2: ggam = _form_bound_filenames(bounds, args.ggam_dir, 'ggam%Y%m%d%H%M.grb') ggas = _form_bound_filenames(bounds, args.ggas_dir, 'ggas%Y%m%d%H%M.nc') spam = _form_bound_filenames(bounds, args.spam_dir, 'spam%Y%m%d%H%M.grb') elif args.ecmwf_flag == 3: raise NotImplementedError('Filename syntax for --ecmwf_flag 3 unknown') elif args.ecmwf_flag == 4: for form, hr in (('C3D*%m%d%H*.nc', 3), ('ECMWF_OPER_%Y%m%d_%H+00.nc', 6), ('ECMWF_ERA_%Y%m%d_%H+00_0.5.nc', 6)): try: bounds = _bound_time(args.File.time + args.File.dur // 2, timedelta(hours=hr)) ggam = _form_bound_filenames(bounds, args.ggam_dir, form) break except FileMissing as e: err = e else: raise err ggas = ggam spam = ggam else: raise BadValue('ecmwf_flag', args.ecmwf_flag) if not args.skip_ecmwf_hr: #hr_ecmwf = _form_bound_filenames(bounds, args.hr_dir, # 'ERA_Interim_an_%Y%m%d_%H+00_HR.grb') # These files don't zero-pad the hour for some reason bounds = _bound_time(args.File.time + args.File.dur // 2, timedelta(hours=6)) hr_ecmwf = [ time.strftime( os.path.join(args.hr_dir, 'ERA_Interim_an_%Y%m%d_') + '{:d}+00_HR.grb'.format(time.hour * 100)) for time in bounds ] if not os.path.isfile(hr_ecmwf[0]): hr_ecmwf = [ time.strftime( os.path.join(args.hr_dir, 'ERA_Interim_an_%Y%m%d_') + '{:d}+00_HR.grb'.format(time.hour * 100)) for time in bounds ] for f in hr_ecmwf: if not os.path.isfile(f): raise FileMissing('HR ECMWF file', f) else: hr_ecmwf = ['', ''] occci = args.File.time.strftime( os.path.join( args.occci_dir, 'ESACCI-OC-L3S-IOP-MERGED-1M_MONTHLY' '_4km_GEO_PML_OCx_QAA-%Y%m-fv3.0.nc')) #------------------------------------------------------------------------ if args.uuid: uid = str(uuid4()) else: uid = 'n/a' # Add NetCDF library to path so following calls works libs = read_orac_libraries(args.orac_lib) try: os.environ["PATH"] = os.path.join(libs["NCDFLIB"][:-4], 'bin:') + \ os.environ["PATH"] except KeyError: pass os.environ["LD_LIBRARY_PATH"] = build_orac_library_path() # Determine current time production_time = datetime.now().strftime("%Y%m%d%H%M%S") # Determine NCDF version from command line try: tmp0 = check_output("ncdump", stderr=STDOUT, universal_newlines=True) except OSError: raise OracError('NetCDF lib improperly built as ncdump not present.') m0 = search(r'netcdf library version (.+?) of', tmp0) if m0: ncdf_version = m0.group(1) else: ncdf_version = 'n/a' warnings.warn('Output formatting of ncdump may have changed.', OracWarning, stacklevel=2) # Fetch ECMWF version from header of NCDF file try: ecmwf_check_file = ggam[0] if ggam[0][-2:] == 'nc' else ggas[0] tmp1 = check_output(["ncdump", "-h", ecmwf_check_file], universal_newlines=True) except OSError: raise FileMissing('ECMWF ggas file', ggas[0]) m1 = search(r':history = "(.+?)" ;', tmp1) if m1: ecmwf_version = m1.group(1) else: ecmwf_version = 'n/a' warnings.warn('Header of ECMWF file may have changed.', OracWarning, stacklevel=2) # Strip RTTOV version from library definition try: rttov_lib = glob(os.path.join(libs['RTTOVLIB'], 'librttov?*_main.a')) except KeyError: rttov_lib = glob( os.path.join(libs['CONDA_PREFIX'] + '/lib', 'librttov?*_main.a')) for rttov_file in rttov_lib: try: m2 = search(r'librttov([\d\.]+)_main.a', rttov_file) rttov_version = m2.group(1) break except: pass else: rttov_version = 'n/a' warnings.warn('Naming of RTTOV library directory may have changed.', OracWarning, stacklevel=2) # Fetch GIT version cwd = os.getcwd() try: os.chdir(os.path.join(args.orac_dir, 'pre_processing')) tmp3 = check_output(["git", "--version"], universal_newlines=True) m3 = search('git version (.+?)\n', tmp3) git_version = m3.group(1) except: git_version = 'n/a' warnings.warn('Unable to call git.', OracWarning, stacklevel=2) finally: os.chdir(cwd) # Fetch repository commit number if not args.revision: args.revision = get_repository_revision() file_version = 'R{}'.format(args.revision) #------------------------------------------------------------------------ # Write driver file driver = """{sensor} {l1b} {geo} {usgs} {ggam[0]} {coef} {atlas} {nise} {alb} {brdf} {emis} {dellon} {dellat} {out_dir} {limit[0]} {limit[1]} {limit[2]} {limit[3]} {ncdf_version} {conventions} {institution} {l2_processor} {creator_email} {creator_url} {file_version} {references} {history} {summary} {keywords} {comment} {project} {license} {uuid} {production_time} {atsr_calib} {ecmwf_flag} {ggas[0]} {spam[0]} {chunk_flag} {day_flag} {verbose} - {assume_full_paths} {include_full_brdf} {rttov_version} {ecmwf_version} {git_version} ECMWF_TIME_INT_METHOD={ecmwf_int_method} ECMWF_PATH_2={ggam[1]} ECMWF_PATH2_2={ggas[1]} ECMWF_PATH3_2={spam[1]} USE_HR_ECMWF={use_ecmwf_hr} ECMWF_PATH_HR={ecmwf_hr[0]} ECMWF_PATH_HR_2={ecmwf_hr[1]} USE_ECMWF_SNOW_AND_ICE={ecmwf_nise} USE_MODIS_EMIS_IN_RTTOV={modis_emis} ECMWF_NLEVELS={ecmwf_nlevels} USE_L1_LAND_MASK={l1_land_mask} USE_OCCCI={use_occci} OCCCI_PATH={occci_file} DISABLE_SNOW_ICE_CORR={no_snow} DO_CLOUD_EMIS={cld_emis} DO_IRONLY={ir_only} DO_CLDTYPE={cldtype} USE_CAMEL_EMIS={camel} USE_SWANSEA_CLIMATOLOGY={swansea}""".format( alb=alb, assume_full_paths=True, # Above file searching returns paths nor dirs atlas=args.atlas_dir, atsr_calib=args.calib_file, brdf=brdf, camel=args.camel_emis, chunk_flag=False, # File chunking no longer required cldtype=not args.skip_cloud_type, cld_emis=args.cloud_emis, coef=args.coef_dir, comment=args.comments, conventions=args.cfconvention, creator_email=args.email, creator_url=args.url, day_flag=args.day_flag, # 0=1=Day, 2=Night dellat=args.dellat, dellon=args.dellon, ecmwf_flag=args.ecmwf_flag, ecmwf_hr=hr_ecmwf, ecmwf_int_method=args.single_ecmwf, ecmwf_nise=args.use_ecmwf_snow, ecmwf_nlevels=args.ecmwf_nlevels, ecmwf_version=ecmwf_version, emis=emis, file_version=file_version, geo=geo, ggam=ggam, ggas=ggas, history=args.history, include_full_brdf=not args.lambertian, institution=args.institute, ir_only=args.ir_only, keywords=args.keywords, l1_land_mask=args.l1_land_mask, l1b=file, l2_processor=args.processor, license=args.license, limit=args.limit, modis_emis=args.use_modis_emis, ncdf_version=ncdf_version, nise=nise, no_snow=args.no_snow_corr, occci_file=occci, out_dir=args.out_dir, usgs=args.usgs_file, production_time=production_time, project=args.project, references=args.references, rttov_version=rttov_version, sensor=args.File.sensor, spam=spam, summary=args.summary, swansea=args.swansea, git_version=git_version, uuid=uid, use_ecmwf_hr=not args.skip_ecmwf_hr, use_occci=args.use_oc, verbose=args.verbose, ) if args.available_channels is not None: driver += "\nN_CHANNELS={}".format(len(args.available_channels)) driver += "\nCHANNEL_IDS={}".format(','.join( str(k) for k in args.available_channels)) for part, f in args.extra_lines: if part == "pre" and f != "": try: with open(f, "r") as e: driver += "\n" + e.read() except IOError: raise FileMissing('extra_lines_file', f) for sec, key, val in args.additional: if sec == "pre": driver += "\n{}={}".format(key, val) if args.File.predef and not args.no_predef: driver += """ USE_PREDEF_LSM=True EXT_LSM_PATH={lsm} USE_PREDEF_GEO=True EXT_GEO_PATH={geo}""".format(lsm=args.prelsm_file, geo=args.pregeo_file) if args.product_name is not None: driver += "\nPRODUCT_NAME={}".format(args.product_name) return driver
def check_args_preproc(args): """Ensure preprocessor parser arguments are valid.""" from pyorac.local_defaults import auxiliaries, global_attributes # Add global attributes global_attributes.update({key: val for key, val in args.global_att}) args.__dict__.update(global_attributes) # Insert auxilliary locations auxiliaries.update({key: val for key, val in args.aux}) args.__dict__.update(auxiliaries) try: # When using ecmwf_dir to set a single directory if os.path.isdir(args.ecmwf_dir): args.ggam_dir = args.ecmwf_dir args.ggas_dir = args.ecmwf_dir args.spam_dir = args.ecmwf_dir except AttributeError: pass # Limit should either be all zero or all non-zero. limit_check = args.limit[0] == 0 for limit_element in args.limit[1:]: if (limit_element == 0) ^ limit_check: warnings.warn('All elements of --limit should be non-zero.', OracWarning, stacklevel=2) # Update FileName class if args.revision is not None: args.File.revision = args.revision if "processor" not in args.File.__dict__: args.File.processor = args.processor if "project" not in args.File.__dict__: args.File.project = args.project if "product_name" not in args.File.__dict__: args.File.product_name = args.product_name if args.File.predef and args.l1_land_mask and not args.no_predef: raise ValueError("Do not set --l1_land_mask while using predefined " "geostationary geolocation.") if not os.path.isdir(args.atlas_dir): raise FileMissing('RTTOV Atlas directory', args.atlas_dir) # if not os.path.isfile(args.calib_file): # raise FileMissing('AATSR calibration file', args.calib_file) if not os.path.isdir(args.coef_dir): raise FileMissing('RTTOV coefficients directory', args.coef_dir) if not os.path.isdir(args.emis_dir): raise FileMissing('RTTOV emissivity directory', args.emis_dir) if not os.path.isdir(args.emos_dir): raise FileMissing('EMOS temporary directory', args.emos_dir) # if not os.path.isdir(args.ggam_dir): # raise FileMissing('ECMWF GGAM directory', args.ggam_dir) # if not os.path.isdir(args.ggas_dir): # raise FileMissing('ECMWF GGAS directory', args.ggas_dir) # if not os.path.isdir(args.hr_dir) and not args.skip_ecmwf_hr: # raise FileMissing('ECMWF high resolution directory', args.hr_dir) # if not os.path.isdir(args.mcd43c3_dir): # raise FileMissing('MODIS MCD43C1 directory', args.mcd43c1_dir) # if not os.path.isdir(args.mcd43c1_dir): # raise FileMissing('MODIS MCD43C3 directory', args.mcd43c3_dir) # if not os.path.isdir(args.occci_dir): # raise FileMissing('OC CCI directory', args.occci_dir) # if not os.path.isdir(args.nise_dir): # raise FileMissing('NISE directory', args.nise_dir) # if not os.path.isdir(args.spam_dir): # raise FileMissing('ECMWF SPAM directory', args.spam_dir) # if not os.path.isfile(args.usgs_file): # raise FileMissing('USGS file', args.usgs_file) return args
def build_preproc_driver(args): """Prepare a driver file for the preprocessor.""" from itertools import product from re import search from subprocess import CalledProcessError, check_output, STDOUT from uuid import uuid4 from pyorac.definitions import BadValue from pyorac.util import (build_orac_library_path, extract_orac_libraries, read_orac_library_file) l1b = _glob_dirs(args.in_dir, args.File.l1b, 'L1B file') geo = _glob_dirs(args.in_dir, args.File.geo, 'geolocation file') # Select NISE file if args.use_ecmwf_snow or args.no_snow_corr: nise = '' else: # There are usually too many files in this directory to glob quickly. # Instead, guess where it is. If your search is failing here, but the # appropriate file is present, you need to add a format to one of # these loops that finds your file. nise_locations = ( 'NISE.005/%Y.%m.%d', 'NISE.004/%Y.%m.%d', 'NISE.002/%Y.%m.%d', 'NISE.001/%Y.%m.%d', '%Y', '%Y.%m.%d', '%Y_%m_d', '%Y-%m-%d', '' ) nise_formats = ( 'NISE_SSMISF18_%Y%m%d.HDFEOS', 'NISE_SSMISF17_%Y%m%d.HDFEOS', 'NISE_SSMIF13_%Y%m%d.HDFEOS', ) for nise_location, nise_format in product(nise_locations, nise_formats): nise = args.File.time.strftime(os.path.join( args.nise_dir, nise_location, nise_format )) if os.path.isfile(nise): break else: raise FileMissing('NISE', args.nise_dir) # Select previous surface reflectance and emissivity files if args.swansea: alb = _date_back_search(args.swansea_dir, args.File.time, 'SW_SFC_PRMS_%m.nc', 'years') brdf = None else: for ver in (61, 6, 5): try: alb = _date_back_search(args.mcd43c3_dir, args.File.time, f'MCD43C3.A%Y%j.{ver:03d}.*.hdf', 'days') brdf = None if args.lambertian else _date_back_search( args.mcd43c1_dir, args.File.time, f'MCD43C1.A%Y%j.{ver:03d}.*.hdf', 'days' ) break except FileMissing: pass else: raise FileMissing('MODIS albedo', args.mcd43c3_dir) if args.use_modis_emis: emis = None elif args.use_camel_emis: emis = _date_back_search( args.camel_dir, args.File.time, 'CAM5K30EM_emis_%Y%m_V???.nc', 'years' ) else: emis = _date_back_search( args.emis_dir, args.File.time, 'global_emis_inf10_monthFilled_MYD11C3.A%Y%j.*nc', 'days' ) # Select ECMWF files bounds = _bound_time(args.File.time + args.File.dur // 2) if args.nwp_flag == 0: ecmwf_nlevels = 91 raise NotImplementedError('Filename syntax for --nwp_flag 0 unknown') elif args.nwp_flag == 4: ecmwf_nlevels = 60 ggam = _form_bound_filenames(bounds, args.ggam_dir, 'ggam%Y%m%d%H%M.grb') ggas = _form_bound_filenames(bounds, args.ggas_dir, 'ggas%Y%m%d%H%M.nc') spam = _form_bound_filenames(bounds, args.spam_dir, 'spam%Y%m%d%H%M.grb') elif args.nwp_flag == 3: ecmwf_nlevels = 60 raise NotImplementedError('Filename syntax for --nwp_flag 3 unknown') elif args.nwp_flag == 1: ecmwf_nlevels = 137 for form, ec_hour in (('C3D*%m%d%H*.nc', 3), ('ECMWF_OPER_%Y%m%d_%H+00.nc', 6), ('ECMWF_ERA5_%Y%m%d_%H_0.5.nc', 6), ('ECMWF_ERA_%Y%m%d_%H_0.5.nc', 6), ('ECMWF_ERA_%Y%m%d_%H+00_0.5.nc', 6)): try: bounds = _bound_time(args.File.time + args.File.dur // 2, ec_hour) ggam = _form_bound_filenames(bounds, args.ecmwf_dir, form) break except FileMissing as tmp_err: err = tmp_err else: raise err ggas = ["", ""] spam = ["", ""] elif args.nwp_flag == 2: ecmwf_nlevels = 137 # Interpolation is done in the code ggam = [args.ecmwf_dir, args.ecmwf_dir] ggas = ["", ""] spam = ["", ""] else: raise BadValue('nwp_flag', args.nwp_flag) if args.use_oc: for oc_version in (5.0, 4.2, 4.1, 4.0, 3.1, 3.0, 2.0, 1.0): occci = args.File.time.strftime(os.path.join( args.occci_dir, 'ESACCI-OC-L3S-IOP-MERGED-1M_MONTHLY' f'_4km_GEO_PML_OCx_QAA-%Y%m-fv{oc_version:.1f}.nc' )) if os.path.isfile(occci): break else: raise FileMissing('Ocean Colour CCI', occci) else: occci = '' # ------------------------------------------------------------------------ if args.uuid: uid = str(uuid4()) else: uid = 'n/a' libs = read_orac_library_file(args.orac_lib) lib_list = extract_orac_libraries(libs) os.environ["LD_LIBRARY_PATH"] = build_orac_library_path(lib_list=lib_list) # Determine current time production_time = datetime.now().strftime("%Y%m%d%H%M%S") # Determine NCDF version from command line for fdr in lib_list: ncdf_exe = os.path.join(fdr, "..", "bin", "ncdump") try: tmp0 = check_output(ncdf_exe, stderr=STDOUT, universal_newlines=True) except FileNotFoundError: continue except CalledProcessError: raise OracError('ncdump is non-functional.') mat0 = search(r'netcdf library version (.+?) of', tmp0) if mat0: ncdf_version = mat0.group(1) else: ncdf_version = 'n/a' warnings.warn('Output formatting of ncdump may have changed.', OracWarning, stacklevel=2) break else: raise OracError('NetCDF lib improperly built as ncdump not present. ' 'LD_LIBRARY_PATH=' + os.environ["LD_LIBRARY_PATH"]) # Fetch ECMWF version from header of NCDF file if 3 <= args.nwp_flag <= 4: try: ecmwf_check_file = ggam[0] if ggam[0].endswith('nc') else ggas[0] tmp1 = check_output([ncdf_exe, "-h", ecmwf_check_file], universal_newlines=True) except OSError: raise FileMissing('ECMWF ggas file', ggas[0]) mat1 = search(r':history = "(.+?)" ;', tmp1) if mat1: ecmwf_version = mat1.group(1) else: ecmwf_version = 'n/a' warnings.warn('Header of ECMWF file may have changed.', OracWarning, stacklevel=2) elif args.nwp_flag == 2: ecmwf_version = 'ERA5' else: # TODO: Fetch version information from GFS files ecmwf_version = 'n/a' # RTTOV version number from small executable try: rttov_version_exe = os.path.join(args.orac_dir, "common", "rttov_version") if not os.path.isfile(rttov_version_exe): rttov_version_exe = os.path.join(args.orac_dir, "rttov_version") rttov_version = check_output( rttov_version_exe, universal_newlines=True ).strip() except CalledProcessError: rttov_version = 'n/a' warnings.warn('RTTOV library version number unavailable.', OracWarning, stacklevel=2) # Fetch GIT version cwd = os.getcwd() try: os.chdir(os.path.join(args.orac_dir, 'pre_processing')) tmp3 = check_output(["git", "--version"], universal_newlines=True) mat3 = search('git version (.+?)\n', tmp3) git_version = mat3.group(1) except (FileNotFoundError, CalledProcessError, AttributeError): git_version = 'n/a' warnings.warn('Unable to call git.', OracWarning, stacklevel=2) finally: os.chdir(cwd) file_version = f'R{args.File.revision}' chunk_flag = False # File chunking no longer required assume_full_paths = True # We pass absolute paths cldtype = not args.skip_cloud_type include_full_brdf = not args.lambertian # ------------------------------------------------------------------------ # Write driver file driver = f"""{args.File.sensor} {l1b} {geo} {args.usgs_file} {ggam[0]} {args.coef_dir} {args.atlas_dir} {nise} {alb} {brdf} {emis} {args.dellon} {args.dellat} {args.out_dir} {args.limit[0]} {args.limit[1]} {args.limit[2]} {args.limit[3]} {ncdf_version} {args.cfconvention} {args.institute} {args.processor} {args.email} {args.url} {file_version} {args.references} {args.history} {args.summary} {args.keywords} {args.comments} {args.project} {args.license} {uid} {production_time} {args.calib_file} {args.nwp_flag} {ggas[0]} {spam[0]} {chunk_flag} {args.day_flag} {args.verbose} - {assume_full_paths} {include_full_brdf} {rttov_version} {ecmwf_version} {git_version} ECMWF_TIME_INT_METHOD={args.single_ecmwf} ECMWF_PATH_2={ggam[1]} ECMWF_PATH2_2={ggas[1]} ECMWF_PATH3_2={spam[1]} USE_ECMWF_SNOW_AND_ICE={args.use_ecmwf_snow} USE_MODIS_EMIS_IN_RTTOV={args.use_modis_emis} ECMWF_NLEVELS={ecmwf_nlevels} USE_L1_LAND_MASK={args.l1_land_mask} USE_OCCCI={args.use_oc} OCCCI_PATH={occci} DISABLE_SNOW_ICE_CORR={args.no_snow_corr} DO_CLOUD_EMIS={args.cloud_emis} DO_IRONLY={args.ir_only} DO_CLDTYPE={cldtype} USE_CAMEL_EMIS={args.use_camel_emis} USE_SWANSEA_CLIMATOLOGY={args.swansea}""" if args.available_channels is not None: driver += "\nN_CHANNELS={}".format(len(args.available_channels)) driver += "\nCHANNEL_IDS={}".format( ','.join(str(k) for k in args.available_channels) ) for part, filename in args.extra_lines: if part == "pre" and filename != "": try: with open(filename, "r") as extra: driver += "\n" + extra.read() except IOError: raise FileMissing('extra_lines_file', filename) for sec, key, val in args.additional: if sec == "pre": driver += f"\n{key}={val}" if args.File.predef and not args.no_predef: driver += f""" USE_PREDEF_LSM=False EXT_LSM_PATH={args.prelsm_file} USE_PREDEF_GEO=False EXT_GEO_PATH={args.pregeo_file}""" if args.product_name is not None: driver += f"\nPRODUCT_NAME={args.product_name}" return driver