def main(): ''' main program, first read command line parameter ''' parser = argparse.ArgumentParser() parser.add_argument("-X", "--EXP", action="store", dest="exp", default="", help="Driving experiment (e.g. historical or rcp85)") parser.add_argument("-G", "--GCM", action="store", dest="gcm", default="", help="Driving GCM") parser.add_argument("-E", "--ENS", action="store", dest="ens", default="", help="Ensemble member of the driving GCM") parser.add_argument( "-r", "--resolution", action="store", dest="reslist", default="", help= "list of desired output resolutions, comma-separated (supported: 1hr (1-hourly), 3hr (3-hourly),6hr (6-hourly),day (daily),mon (monthly) ,sem (seasonal),fx (for time invariant variables)" ) parser.add_argument( "-v", "--varlist", action="store", dest="varlist", default="", help= "comma-separated list of variables (RCM or CORDEX name) to be processed. If combined with --all: start with variable and process all remaining variables from the alphabetic order (ordered by RCM name) [until end variable, if given]." ) parser.add_argument("-a", "--all", action="store_true", dest="all_vars", default=False, help="process all available variables") parser.add_argument("-O", "--overwrite", action="store_true", dest="overwrite", default=False, help="Overwrite existent output files") parser.add_argument( "-M", "--multi", action="store", dest="multi", default=1, help="Use multiprocessing and specify number of available cores.") parser.add_argument("-c", "--chunk-var", action="store_true", dest="chunk_var", default=False, help="Concatenate files to chunks") parser.add_argument("--remove", action="store_true", dest="remove_src", default=False, help="Remove source files after chunking") parser.add_argument( "-s", "--start", action="store", dest="proc_start", default="", help= "Start year (and start month if not January) for processing. Format: YYYY[MM] " ) parser.add_argument( "-e", "--end", action="store", dest="proc_end", default="", help= "End year (and end month if not December) for processing. Format: YYYY[MM]" ) parser.add_argument("-P", "--propagate", action="store_true", dest="propagate", default=False, help="Propagate log to standard output.") parser.add_argument( "-S", "--silent", action="store_false", dest="normal_log", default=True, help= "Write only minimal information to log (variables and resolutions in progress, warnings and errors)" ) parser.add_argument("-V", "--verbose", action="store_true", dest="verbose_log", default=False, help="Verbose logging for debugging") parser.add_argument("-A", "--append_log", action="store_true", dest="append_log", default=False, help="Append to log instead of overwrite") parser.add_argument( "-f", "--force_proc", action="store_false", dest="process_table_only", default=True, help= "Try to process variable at specific resolution regardless of what is written in the variables table" ) parser.add_argument("-n", "--use-version", action="store", dest="use_version", default=tools.new_dataset_version(), help="version to be added to directory structure") parser.add_argument("-i", "--ini", action="store", dest="inifile", default="control_cmor.ini", help="configuration file (.ini)") parser.add_argument("-d", "--no_derotate", action="store_false", dest="derotate_uv", default=True, help="no derotation of u and v variables") parser.add_argument("-m", "--simulation", action="store", dest="simulation", default='', help="which simulation specific settings to choose") options = parser.parse_args() config.load_configuration(options.inifile) if options.simulation != "": config.set_config_value('settings', 'simulation', options.simulation) #limit range if start and end are given in command line if options.proc_start != "" and options.proc_end != "": limit_range = True else: limit_range = False #store parsed arguments in config if options.proc_start != "": config.set_config_value('integer', "proc_start", options.proc_start[:4]) if len(options.proc_start) == 6: config.set_config_value('integer', "first_month", options.proc_start[4:]) else: config.set_config_value('integer', "first_month", "1") if options.proc_end != "": config.set_config_value('integer', "proc_end", options.proc_end[:4]) if len(options.proc_end) == 6: config.set_config_value('integer', "last_month", options.proc_end[4:]) else: config.set_config_value('integer', "last_month", "12") if options.varlist != "": config.set_config_value('settings', 'varlist', options.varlist) change_driv_exp = False if options.gcm != "": config.set_model_value('driving_model_id', options.gcm) change_driv_exp = True if options.ens != "": config.set_model_value('driving_model_ensemble_member', options.ens) change_driv_exp = True if options.exp != "": config.set_model_value('driving_experiment_name', options.exp) config.set_model_value('experiment_id', options.exp) change_driv_exp = True if change_driv_exp: config.set_model_value( 'driving_experiment', "%s, %s, %s" % (config.get_sim_value('driving_model_id'), config.get_sim_value('experiment_id'), config.get_sim_value('driving_model_ensemble_member'))) config.set_config_value('boolean', 'overwrite', options.overwrite) config.set_config_value('boolean', 'limit_range', limit_range) config.set_config_value('boolean', 'remove_src', options.remove_src) config.set_config_value('integer', 'multi', options.multi) config.set_config_value('boolean', 'derotate_uv', options.derotate_uv) config.set_config_value('boolean', 'propagate_log', options.propagate) #Extend input path if respective option is set: if config.get_config_value('boolean', 'extend_DirIn') == True: DirIn = config.get_config_value( 'settings', 'DirIn') + '/' + config.get_sim_value( 'driving_model_id') + '/' + config.get_sim_value( 'driving_experiment_name') config.set_config_value('settings', 'DirIn', DirIn) DirDerotated = config.get_config_value( 'settings', 'DirDerotated') + '/' + config.get_sim_value( 'driving_model_id') + '/' + config.get_sim_value( 'driving_experiment_name') config.set_config_value('settings', 'DirDerotated', DirDerotated) # now read vartable for all variables for this RCM vartable = config.get_sim_value('vartable') settings.init(vartable) varlist = settings.varlist if options.all_vars == False: if varlist == [] or varlist == ['']: raise Exception( "No variables set for processing! Set with -v or -a or in configuration file." ) else: varlist_all = [] #config.varlist['3hr'] + config.varlist['6hr'] varlist_all.extend(tools.get_var_lists()) if (len(varlist) == 1 or len(varlist) == 2) and varlist != ['']: #start from given variable try: varstart = settings.param[varlist[0]][config.get_config_value( 'index', 'INDEX_VAR')] except: cmd = "Variable %s not found in list of all variables! Cannot start processing from this variable" % varlist[ 0] log.error(cmd) raise Exception(cmd) try: if len(varlist) == 2: varend = settings.param[varlist[1]][ config.get_config_value('index', 'INDEX_VAR')] varlist = varlist_all[ varlist_all.index(varstart):varlist_all.index(varend) + 1] else: varlist = varlist_all[varlist_all.index(varstart):] except: cmd = "Variable %s not found in list of all variables! Cannot stop processing at this variable" % varlist[ 1] log.error(cmd) raise Exception(cmd) elif len(varlist) > 2: cmd = "Option --all contradicts giving a variable list with more than two variables!" log.error(cmd) raise Exception(cmd) else: varlist = varlist_all if options.reslist == "": #if output resolutions not given in command -> take from inifile reslist = config.get_config_value('settings', 'reslist').replace( " ", ",") #to allow for space as delimiter reslist = list(filter( None, reslist.split(','))) #split string and remove empty strings else: reslist = options.reslist.split(',') #HJP March 2019 Begin: definition of log-file after the setting of varlist and #extension of log-file name by element "0" of varlist (first entry in varlist), which denotes the CMOR-name of the first variable in the list # create logger LOG_BASE = settings.DirLog if os.path.isdir(LOG_BASE) == False: print("Create logging directory: %s" % LOG_BASE) if not os.path.isdir(LOG_BASE): os.makedirs(LOG_BASE) #LOG_FILENAME = os.path.join(LOG_BASE,'CMORlight.')+config.get_sim_value('driving_model_id')+"_"+config.get_sim_value('experiment_id')+"." LOG_FILENAME = os.path.join(LOG_BASE, 'CMORlight.') + config.get_sim_value( 'driving_model_id') + "_" + config.get_sim_value( 'experiment_id') + "_" + varlist[0] + "." logext = datetime.datetime.now().strftime("%d-%m-%Y") + '.log' # get logger and assign logging filename (many loggers for multiprocessing) if limit_range and int(options.multi) > 1: #create logger for each processing year for y in range(config.get_config_value("integer", "proc_start"), config.get_config_value("integer", "proc_end") + 1): logfile = LOG_FILENAME + str(y) + '.' + logext log = init_log.setup_custom_logger( "cmorlight_" + str(y), logfile, config.get_config_value('boolean', 'propagate_log'), options.normal_log, options.verbose_log, options.append_log) #change general logger name LOG_FILENAME += "%s_%s." % (config.get_config_value( "integer", "proc_start"), config.get_config_value("integer", "proc_end")) log = init_log.setup_custom_logger( "cmorlight", LOG_FILENAME + logext, config.get_config_value('boolean', 'propagate_log'), options.normal_log, options.verbose_log, options.append_log) #HJP March 2019 End #HJP _April 15th Begin: following part must appear after the definition of LOG-Files since messages might be written in these LOG-files; therefore, the files must exist if not limit_range and int(options.multi) > 1: print( "To use multiprocessing you have to limit the time range by specifying this range over the command line (-s START, -e END)! Exiting..." ) log.error( "To use multiprocessing you have to limit the time range by specifying this range over the command line (-s START, -e END)! Exiting..." ) sys.exit() # creating working directory if not existent if not os.path.isdir(settings.DirWork): log.debug("Working directory does not exist, creating: %s" % (settings.DirWork)) if not os.path.isdir(settings.DirWork): os.makedirs(settings.DirWork) if not os.path.isdir(settings.DirOut): log.debug("Output directory does not exist, creating: %s" % (settings.DirOut)) if not os.path.isdir(settings.DirOut): os.makedirs(settings.DirOut) if config.get_config_value('boolean', 'add_version_to_outpath'): settings.use_version = options.use_version #HJP April 15th End # if nothing is set: exit the program log.info("Configuration read from: %s" % os.path.abspath(vartable)) log.info("Variable(s): %s " % varlist) log.info("Requested time output resolution(s): %s " % reslist) if options.process_table_only: log.info( "For each variable processing only resolutions declared in parameter table" ) else: log.info( "Processing all resolutions lower equal the input data resolution") # process all var in varlist with input model and input experiment for proc_list item # needed for progress bar nfiles = None #estimated total number of files currfile = 0 #currently processed files for var in varlist: if var not in settings.param: log.warning("Variable '%s' not supported!" % (var)) continue params = settings.param[var] varCMOR = params[config.get_config_value('index', 'INDEX_VAR')] varRCM = params[config.get_config_value('index', 'INDEX_RCM_NAME')] if (varCMOR in settings.var_skip_list) or (varRCM in settings.var_skip_list): log.debug( "###########################################################") log.debug( "Variable was found in var_skip_list. Skip this variables") continue log.log( 35, "\n\n\n###########################################################\n# Var in work: %s / %s\n###########################################################" % (varCMOR, varRCM)) # set global attributes in the dictionary tools.set_attributes(params) # skip fixed fields from chunking, makes no sense to chunk if options.chunk_var == True and not var in settings.var_list_fixed: log.log(35, "Chunking files \n #######################") tools.proc_chunking(params, reslist) else: reslist_act = list(reslist) #new copy of reslist if (varRCM not in settings.var_list_fixed) and ( varCMOR not in settings.var_list_fixed): for res in reslist: if tools.check_resolution( params, res, options.process_table_only) == False: reslist_act.remove( res ) #remove resolution from list (for this variable) if it is not in table or if it is not supported if reslist_act == []: log.warning( "None of the given resolutions appears in the table! Skipping variable..." ) continue # process all vars from varlist with all output resolutions from reslist nfiles, currfile = process_resolution(params, reslist_act, len(varlist), nfiles, currfile)
def process_resolution(params, reslist, nvar, nfiles, currfile): ''' Processes files for variable defined by params for all resolutions in reslist Parameters ---------- params : list Row in variables table corresponding to variable processed in the call of this function reslist : list List of resolutions the variable should be processed at nvar : int Number of variables (needed for progress bar) nfiles : int Currently estimated total number of files for all variables (needed for progress bar) currfile : int Currently already processed number of files ''' #Do seasonal resolution= if "sem" in reslist: seasonal = True reslist.remove("sem") else: seasonal = False log = logging.getLogger("cmorlight") # get cdf variable name var = params[config.get_config_value('index', 'INDEX_VAR')] varRCM = params[config.get_config_value('index', 'INDEX_RCM_NAME')] # create path to input files from basedir,model,driving_model in_dir = "%s/%s" % (tools.get_input_path(), params[config.get_config_value( 'index', 'INDEX_RCM_NAME')]) log.debug("Looking for input dir(1): %s" % (in_dir)) if os.path.isdir(in_dir) == False: log.error( "Input directory does not exist(0): %s \n \t Change base path in .ini file or create directory! " % in_dir) return nfiles, currfile cores = config.get_config_value("integer", "multi", exitprog=False) multilst = [] seaslst = [] log.info("Used dir: %s" % (in_dir)) for dirpath, dirnames, filenames in os.walk(in_dir, followlinks=True): if not nfiles: #estimate total if config.get_config_value('boolean', 'limit_range'): nfiles = nvar * ( config.get_config_value('integer', 'proc_end') - config.get_config_value('integer', 'proc_start') + 1) else: nfiles = nvar * len(filenames) if len(filenames) == 0: log.warning("No files found! Skipping this variable...") i = 0 for f in sorted(filenames): if f[-3:] != ".nc": continue if var not in settings.var_list_fixed: year = f.split("_")[-1][:4] #use other logger if cores > 1 and var not in settings.var_list_fixed: logger = logging.getLogger("cmorlight_" + year) logger.info( "\n###########################################################\n# Var in work: %s / %s\n###########################################################" % (var, varRCM)) logger.info("Start processing at: " + str(datetime.datetime.now())) else: logger = logging.getLogger("cmorlight") #if limit_range is set: skip file if it is out of range if config.get_config_value( 'boolean', 'limit_range') and var not in settings.var_list_fixed: if int(year) < config.get_config_value( 'integer', 'proc_start') or int(year) > config.get_config_value( 'integer', 'proc_end'): continue #Define first and last month of file if config.get_config_value('integer', "proc_start") == int(year): firstlast = [ config.get_config_value('integer', "first_month"), 12 ] elif config.get_config_value('integer', "proc_end") == int(year): firstlast = [ 1, config.get_config_value('integer', "last_month") ] else: firstlast = [1, 12] else: firstlast = [1, 12] logger.info( "\n###########################################################" ) if f.find("%s_" % var) == 0 or f.find( "%s.nc" % var) == 0 or f.find( "%s_" % varRCM) == 0 or f.find( "%s.nc" % varRCM) == 0 or f.find( "%s_" % varRCM[:varRCM.find('p')]) == 0: in_file = "%s/%s" % (dirpath, f) logger.log(35, "Input from: %s" % (in_file)) if os.access(in_file, os.R_OK) == False: logger.error("Could not read file '%s', no permission!" % in_file) else: if var in settings.var_list_fixed: tools.process_file_fix(params, in_file) else: if cores > 1: multilst.append([ params, in_file, var, reslist, year, firstlast ]) seaslst.append([params, year]) else: reslist = tools.process_file( params, in_file, var, reslist, year, firstlast) if seasonal: tools.proc_seasonal(params, year) else: logger.warning( "File %s does match the file name conventions for this variable. File not processed..." ) i = i + 1 #process as many files simultaneously as there are cores specified if i == cores and multilst != []: log.info("Processing years %s to %s simultaneously" % (seaslst[0][1], seaslst[-1][1])) pool = Pool(processes=cores) R = pool.map(process_file_unpack, multilst) pool.terminate() #seasonal processing: if seasonal: pool = Pool(processes=cores) pool.map(proc_seasonal_unpack, seaslst) pool.terminate() currfile += len(multilst) #start new multilst = [] seaslst = [] i = 0 #change reslist reslist = R[0] #update currfile if cores <= 1: currfile += 1 #print progress bar tools.print_progress(currfile, nfiles) #process remaining files if len(multilst) != 0: log.info("Processing years %s to %s simultaneously" % (seaslst[0][1], seaslst[-1][1])) pool = Pool(processes=len(multilst)) R = pool.map(process_file_unpack, multilst) pool.terminate() #seasonal processing: if seasonal: pool = Pool(processes=cores) pool.map(proc_seasonal_unpack, seaslst) pool.terminate() #update currfile currfile += len(multilst) tools.print_progress(currfile, nfiles) log.info("Variable '%s' finished!" % (var)) return nfiles, currfile
def init(vartable): ''' Set global variables and read the variables table vartable ''' # base path for all other path global BasePath BasePath = config.get_config_value('settings', 'BasePath', exitprog=False) if BasePath != "" and BasePath[-1] == "/": BasePath = BasePath[:-1] global DataPath DataPath = config.get_config_value('settings', 'DataPath', exitprog=False) if DataPath != "" and DataPath[-1] == "/": DataPath = DataPath[:-1] global DirIn DirIn = ("%s/%s" % (DataPath, config.get_config_value('settings', 'DirIn'))) global DirOut DirOut = ("%s/%s" % (DataPath, config.get_config_value('settings', 'DirOut'))) global DirConfig DirConfig = ("%s/%s" % (BasePath, config.get_config_value('settings', 'DirConfig'))) global DirWork DirWork = ("%s/%s" % (DataPath, config.get_config_value('settings', 'DirWork'))) global DirLog DirLog = ("%s/%s" % (BasePath, config.get_config_value('settings', 'DirLog'))) global DirDerotated DirDerotated = ( "%s/%s" % (DataPath, config.get_config_value('settings', 'DirDerotated'))) global global_attr_list global_attr_list = config.get_config_value('settings', 'global_attr_list').split(',') #HJP March 2019 Begin # consideration of possible 2nd nest information global global_attr_list_2ndNest global_attr_list_2ndNest = config.get_config_value( 'settings', 'global_attr_list_2ndNest').split(',') #HJP March 2019 End global global_attr_file global_attr_file = config.get_config_value('settings', 'global_attr_file', exitprog=False).split(',') global varlist_reject varlist_reject = config.get_config_value('settings', 'varlist_reject').split(',') global var_skip_list var_skip_list = config.get_config_value('settings', 'var_skip_list', exitprog=False).split(',') global var_list_fixed var_list_fixed = config.get_sim_value('var_list_fixed', exitprog=False).split(',') global varlist varlist = config.get_config_value('settings', 'varlist', exitprog=False).split(',') global FMT FMT = '%Y-%m-%d %H:%M:%S' global vertices_file vertices_file = ( "%s/%s" % (DirConfig, config.get_sim_value('vertices_file', exitprog=False))) global coordinates_file coordinates_file = ("%s/%s" % (DirConfig, config.get_sim_value('coordinates_file'))) # dictionary for global attributes global Global_attributes Global_attributes = OrderedDict() #HJP April 2019 Begin # dictionary for global attributes of 2nd Nest global Global_attributes_2ndNest Global_attributes_2ndNest = OrderedDict() #HJP April 2019 Begin # dictionary for additional netcdf atributes global netCDF_attributes netCDF_attributes = OrderedDict() global use_version use_version = '' global param param = {} with open(DirConfig + "/" + vartable, 'rt') as csvfile: reader = csv.reader(csvfile, delimiter=';') for i, row in enumerate(reader): if i == 0: # skip header continue var = row[config.get_config_value('index', 'INDEX_VAR')] if row[config.get_config_value( 'index', 'INDEX_RCM_NAME_ORG')] != '' and var != '': #create dictionary entries for variables names of CORDEX as well as of the RCM param[var] = row if var != "prhmax": #as RCM name in table is equal to pr param[row[config.get_config_value('index', 'INDEX_RCM_NAME')]] = row global dpm dpm = { 'noleap': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], '365_day': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], 'standard': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], 'gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], 'proleptic_gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], 'all_leap': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], '366_day': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], '360_day': [0, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30] }