def periodOfEmFile(filename,realm,freq): """ Return the period covered by a file handled by EM, based on filename rules for EM. returns None if file frequency does not fit freq """ if (realm == 'A' or realm == 'L' ) : if freq=='mon' or freq=='' : year=re.sub(r'^.*([0-9]{4}).nc',r'\1',filename) if year.isdigit(): speriod="%s01-%s12"%(year,year) return init_period(speriod) else: raise Climaf_Data_Error("can yet handle only monthly frequency for realms A and L - TBD") elif (realm == 'O' or realm == 'I' ) : if freq=='monthly' or freq=='mon' or freq=='' : altfreq='m' elif freq[0:2] =='da' : altfreq='d' else: raise Climaf_Data_Error("Can yet handle only monthly and daily frequency for realms O and I - TBD") patt=r'^.*_1'+altfreq+r'_([0-9]{8})_*([0-9]{8})_.*nc' beg=re.sub(patt,r'\1',filename) end=re.sub(patt,r'\2',filename) #clogger.debug("beg=%s,end=%s,fn=%s"%(beg,end,filename)) if (end==filename or beg==filename) : return None return init_period("%s-%s"%(beg,end)) else: raise Climaf_Data_Error("unexpected realm "+realm)
def periodOfEmFile(filename,realm,freq): """ Return the period covered by a file handled by EM, based on filename rules for EM. returns None if file frequency does not fit freq """ if (realm == 'A' or realm == 'L' ) : if freq=='mon' or freq=='' : year=re.sub(r'^.*([0-9]{4}).nc',r'\1',filename) if year.isdigit(): speriod="%s01-%s12"%(year,year) return init_period(speriod) else: raise classes.Climaf_Error("can yet handle only monthly frequency for realms A and L - TBD") elif (realm == 'O' or realm == 'I' ) : if freq=='monthly' or freq=='mon' or freq=='' : altfreq='m' elif freq[0:2] =='da' : altfreq='d' else: raise classes.Climaf_Error("Can yet handle only monthly and daily frequency for realms O and I - TBD") patt=r'^.*_1'+altfreq+r'_([0-9]{8})_*([0-9]{8}).*nc' beg=re.sub(patt,r'\1',filename) end=re.sub(patt,r'\2',filename) #clogger.debug("beg=%s,end=%s,fn=%s"%(beg,end,filename)) if (end==filename or beg==filename) : return None return init_period("%s-%s"%(beg,end)) else: raise classes.Climaf_Error("unexpected realm "+realm)
def selectCmip5DrsFiles(urls, **kwargs) : # example for path : CMIP5/output1/CNRM-CERFACS/CNRM-CM5/1pctCO2/mon/atmos/ # Amon/r1i1p1/v20110701/clivi/clivi_Amon_CNRM-CM5_1pctCO2_r1i1p1_185001-189912.nc # second path segment can be any string (allows for : output,output1, merge...), # but if 'merge' exists, it is used alone # If version is 'last', tries provide version from directory 'last' if available, # otherwise those of last dir project=kwargs['project'] model=kwargs['model'] simulation=kwargs['simulation'] frequency=kwargs['frequency'] variable=kwargs['variable'] realm=kwargs['realm'] table=kwargs['table'] period=kwargs['period'] experiment=kwargs['experiment'] version=kwargs['version'] # rep=[] frequency2drs=dict({'monthly':'mon'}) freqd=frequency if frequency in frequency2drs : freqd=frequency2drs[frequency] # TBD : analyze ambiguity of variable among realms+tables for l in urls : pattern1=l+"/"+project+"/merge" if not os.path.exists(pattern1) : pattern1=l+"/"+project+"/*" patternv=pattern1+"/*/"+model+"/"+experiment+"/"+freqd+"/"+realm+"/"+table+"/"+simulation # Get version directories list ldirs=glob.glob(patternv) #print "looking at "+patternv+ " gives:" +`ldirs` for repert in ldirs : lversions=os.listdir(repert) lversions.sort() #print "lversions="+`lversions`+ "while version="+version cversion=version # initial guess of the version to use if (version == "last") : if (len(lversions)== 1) : cversion=lversions[0] elif (len(lversions)> 1) : if "last" in lversions : cversion="last" else : cversion=lversions[-1] # Assume that order provided by sort() is OK #print "using version "+cversion+" for requested version: "+version lfiles=glob.glob(repert+"/"+cversion+"/"+variable+"/*.nc") #print "listing "+repert+"/"+cversion+"/"+variable+"/*.nc" #print 'lfiles='+`lfiles` for f in lfiles : if freqd != 'fx' : #clogger.debug("checking period for "+ f) regex=r'^.*([0-9]{4}[0-9]{2}-[0-9]{4}[0-9]{2}).nc$' fileperiod=init_period(re.sub(regex,r'\1',f)) if (fileperiod and period.intersects(fileperiod)) : rep.append(f) else : clogger.debug("adding fixed field "+ f) rep.append(f) return rep
def selectCmip5DrsFiles(urls, **kwargs) : # example for path : CMIP5/[output1/]CNRM-CERFACS/CNRM-CM5/1pctCO2/mon/atmos/ # Amon/r1i1p1/v20110701/clivi/clivi_Amon_CNRM-CM5_1pctCO2_r1i1p1_185001-189912.nc # # second path segment can be any string (allows for : output,output1, merge...), # but if 'merge' exists, it is used alone # This segment ca also be empty # # If version is 'last', tries provide version from directory 'last' if available, # otherwise those of last dir project=kwargs['project'] model=kwargs['model'] simulation=kwargs['simulation'] frequency=kwargs['frequency'] variable=kwargs['variable'] realm=kwargs['realm'] table=kwargs['table'] period=kwargs['period'] experiment=kwargs['experiment'] version=kwargs['version'] # rep=[] frequency2drs=dict({'monthly':'mon'}) freqd=frequency if frequency in frequency2drs : freqd=frequency2drs[frequency] # TBD : analyze ambiguity of variable among realms+tables for l in urls : totry=['merge/','output/','output?/','main/',''] for p in totry : pattern1=l+"/"+project+"/"+p+"*/"+model # one * for modelling center joker_version="*" patternv=pattern1+"/"+experiment+"/"+freqd+"/"+realm+"/"+table+"/"+simulation+"/"+joker_version+"/"+variable if len(glob.glob(patternv))>0 : break patternv=pattern1+"/"+experiment+"/"+freqd+"/"+realm+"/"+table+"/"+simulation # Get version directories list ldirs=glob.glob(patternv) clogger.debug("Globbing with "+patternv+ " gives:" +`ldirs`) for repert in ldirs : lversions=os.listdir(repert) lversions.sort() #print "lversions="+`lversions`+ "while version="+version cversion=version # initial guess of the version to use if (version == "last") : if (len(lversions)== 1) : cversion=lversions[0] elif (len(lversions)> 1) : if "last" in lversions : cversion="last" else : cversion=lversions[-1] # Assume that order provided by sort() is OK #print "using version "+cversion+" for requested version: "+version lfiles=glob.glob(repert+"/"+cversion+"/"+variable+"/*.nc") #print "listing "+repert+"/"+cversion+"/"+variable+"/*.nc" #print 'lfiles='+`lfiles` for f in lfiles : if freqd != 'fx' : #clogger.debug("checking period for "+ f) if freqd=='day': regex=r'^.*([0-9]{8}-[0-9]{8}).nc$' elif freqd=='mon': #regex=r'^.*([0-9]{4}[0-9]{2}-[0-9]{4}[0-9]{2}).nc$' regex=r'^.*([0-9]{6}-[0-9]{6}).nc$' elif freqd=='yr': regex=r'^.*([0-9]{4}-[0-9]{4}).nc$' fileperiod=init_period(re.sub(regex,r'\1',f)) if (fileperiod and period.intersects(fileperiod)) : rep.append(f) else : clogger.debug("adding fixed field "+ f) rep.append(f) return rep
def selectGenericFiles(urls, return_wildcards=None,merge_periods_on=None,**kwargs): """ Allow to describe a ``generic`` file organization : the list of files returned by this function is composed of files which : - match the patterns in ``url`` once these patterns are instantiated by the values in kwargs, and - contain the ``variable`` provided in kwargs - match the `period`` provided in kwargs In the pattern strings, no keyword is mandatory. However, for remote files, filename pattern must include ${varname}, which is instanciated by variable name or ``filenameVar`` (given via :py:func:`~climaf.classes.calias()`); this is for the sake of efficiency (please complain if inadequate) Example : >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*${PERIOD}*.nc)'] /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc In the pattern strings, the keywords that can be used in addition to the argument names (e.g. ${model}) are: - ${variable} : use it if the files are split by variable and filenames do include the variable name, as this speed up the search - ${PERIOD} : use it for indicating the period covered by each file, if this is applicable in the file naming; this period can appear in filenames as YYYY, YYYYMM, YYYYMMDD, YYYYMMDDHHMM, either once only, or twice with separator ='-' or '_' - wildcards '?' and '*' for matching respectively one and any number of characters """ def store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards, merge_periods_on=None, fperiod=None,periods=None,periods_dict=None): """" """ if fperiod is not None and periods is not None : clogger.debug('Adding period %s'%fperiod) periods.append(fperiod) # for kw in kwargs : it=re.finditer(facets_regexp,f) for oc in it : try : facet_value=oc.group(kw) except : continue if type(kwargs[kw]) is str and ("*" in kwargs[kw] or "?" in kwargs[kw] ): if facet_value is not None : if kw not in wildcards : wildcards[kw]=set() wildcards[kw].add(facet_value) clogger.debug("Discover %s=%s for file=%s"%(kw,facet_value,f)) else : clogger.debug("Logic issue for kw=%s and file=%s"%(kw,f)) # if fperiod is not None and periods is not None : if merge_periods_on is None : key=None elif kw == merge_periods_on : key=facet_value else : #print "Skipping for kw=%s,sort=%s"%(kw,merge_periods_on) continue if key not in periods_dict: periods_dict[key]=set() #print "adding period %s for key %s"%(fperiod,key) periods_dict[key].add(fperiod) else: pass #print "no Adding period for %s=%s for %s"%(kw,facet_value,f) #print "end of store, periods_dict=",periods_dict, "wild=",wildcards rep=[] # periods=None # a list of periods available periods_dict=dict() # period=kwargs['period'] ; if period == "*" : periods=[] # List of all periods elif type(period) is str : period=init_period(period) # variable=kwargs['variable'] altvar=kwargs.get('filenameVar',variable) # # dicts of date patterns, for globbing and for regexp # digit="[0-9]" date_glob_patt={ "${PERIOD}" : "*" } # an ordered list of dates keywords date_keywords=date_glob_patt.keys() ; date_keywords.sort(reverse=True) # annee="%s{4}"%digit mois="(01|02|03|04|05|06|07|08|09|10|11|12)" jour="([0-3][0-9])" heure="(00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23)" minutes="[0-5][0-9]" date="%s(%s(%s(%s(%s)?)?)?)?"%(annee,mois,jour,heure,minutes) rperiod="(?P<period>(?P<start>%s)([_-](?P<end>%s))?)"%(date,date) date_regexp_patt={ "${PERIOD}" : rperiod } # an ordered list of dates regexp keywords date_regexp_keywords=date_regexp_patt.keys() ; date_regexp_keywords.sort(reverse=True) # # for l in urls : # Instantiate keywords in pattern with attributes values remote_prefix="" ; if re.findall(".*:.*",l) : remote_prefix=':'.join(l.split(":")[0:-1])+':' basename=l.split(":")[-1] # This discard the remote_prefix if any basename=basename.replace("//","/") my_template=Template(basename) template=my_template.safe_substitute(**kwargs) #print "template after attributes replace : "+template # # Construct a pattern for globbing dates temp2=template for k in date_keywords : temp2=temp2.replace(k,date_glob_patt[k]) # Do globbing with plain varname if remote_prefix : lfiles=sorted(glob_remote_data(remote_prefix, temp2)) clogger.debug("Remote globbing %d files for varname on %s : "%\ (len(lfiles),remote_prefix+temp2)) else: # local data lfiles=sorted(glob.glob(temp2)) clogger.debug("Before regexp filtering : Globbing %d files for varname on %s : "%(len(lfiles),temp2)) # Must filter with regexp, because * with glob is too inclusive alt=[] for f in lfiles : for k in date_keywords : if re.search(date_regexp_patt[k],f) : alt.append(f) continue lfiles=alt clogger.debug("Globbing %d files for varname on %s : "%(len(lfiles),temp2)) # # If unsuccessful using varname, try with filenameVar if len(lfiles)==0 and "filenameVar" in kwargs and kwargs['filenameVar'] : # Change value of facet 'variable' kwargs['variable']=kwargs['filenameVar'] template=my_template.safe_substitute(**kwargs) temp2=template for k in date_keywords : temp2=temp2.replace(k,date_glob_patt[k]) # # Do globbing with fileVarname if remote_prefix : # lfiles=sorted(glob_remote_data(remote_prefix, temp2)) clogger.debug("Remote globbing %d files for filenamevar on %s: "%\ (len(lfiles),remote_prefix+temp2)) else: # local data lfiles=sorted(glob.glob(temp2)) # Must filter with regexp, because * with glob is too inclusive alt=[] for f in lfiles : for k in date_keywords : if re.search(date_regexp_patt[k],f) : alt.append(f) continue lfiles=alt clogger.debug("Globbing %d files for filenamevar on %s: "%(len(lfiles),temp2)) # # For discovering values for those facets which are a wildcard, # construct a regexp with a group name for all facets (but period) alt_basename=basename.replace("?",".").replace("*",".*") alt_kwargs=kwargs.copy() for kw in kwargs : if type(kwargs[kw]) is str : # This excludes period attribute, which has a type alt_kwargs[kw]=kwargs[kw].replace("?",".").replace("*",".*") alt_basename=alt_basename.replace(r"${%s}"%kw,r"(?P<%s>%s)"%(kw,alt_kwargs[kw]),1) facets_regexp=Template(alt_basename).safe_substitute(**alt_kwargs) for k in date_regexp_keywords : facets_regexp=facets_regexp.replace(k,date_regexp_patt[k],1) facets_regexp=facets_regexp.replace(k,".*") wildcards=dict() #print "facets_regexp=",facets_regexp # # Construct regexp for extracting dates from filename date_regexp=None template_toreg=template.replace("*",".*").replace("?",r".").replace("+","\+") #print "template before searching dates : "+template_toreg for key in date_regexp_keywords : #print "searchin "+key+" in "+template start=template_toreg.find(key) if (start>=0 ) : date_regexp=template_toreg.replace(key,date_regexp_patt[key],1) #print "found ",key," dateregexp ->",date_regexp hasEnd=False start=date_regexp.find(key) #start=date_regexp.find(key) if (start >=0 ) : hasEnd=True date_regexp=date_regexp.replace(key,date_regexp_patt[key],1) #date_regexp=date_regexp.replace(key,date_regexp_patt[key],1) break #print "date_regexp before searching dates : "+date_regexp # for f in lfiles : #print "processing file "+f # # Extract file time period # fperiod=None if date_regexp : if "P<period>" in date_regexp : #print "date_rexgep=",date_regexp #print "f=",f #print "period=",re.sub(date_regexp,r'\g<period>',f) tperiod=re.sub(date_regexp,r'\g<period>',f) if tperiod==f : raise classes.Climaf_Error("Cannot find a period in %s with regexp %s"%(f,date_regexp)) fperiod=init_period(tperiod) else: date_regexp0=date_regexp #print "date_regexp for extracting dates : "+date_regexp0, "file="+f start=re.sub(date_regexp0,r'\1',f) if start==f: raise Climaf_Data_Error("Start period not found in %s using regexp %s"%(f,regexp0)) #? if hasEnd : end=re.sub(date_regexp0,r'\2',f) fperiod=init_period("%s-%s"%(start,end)) else : fperiod=init_period(start) #print "period for file %s is %s"%(f,fperiod) # # Filter file time period against required period else : if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \ kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) : # local data if not remote_prefix and \ ( (basename.find("${variable}")>=0) or variable=='*' or \ fileHasVar(f,variable) or (variable != altvar and fileHasVar(f,altvar)) ) : clogger.debug("adding fixed field :"+f) store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on) rep.append(f) # remote data elif remote_prefix : if (basename.find("${variable}")>=0) or variable=='*' or \ (variable != altvar and (f.find(altvar)>=0) ): clogger.debug("adding fixed field :"+remote_prefix+f) store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on) rep.append(remote_prefix+f) else: raise classes.Climaf_Error( "For remote files, filename pattern (%s) should include ${varname} "+\ "(which is instanciated by variable name or filenameVar)"%f) else : clogger.info("Cannot yet filter files re. time using only file content.") store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on) rep.append(f) # # If file period matches requested period, check similarly for variable # #print "fperiod=",fperiod #print "periods=",periods #print "inter=",period.intersects(fperiod) #print "date_regexp=",date_regexp if (fperiod and ( periods is not None or period.intersects(fperiod) )) \ or not date_regexp : # clogger.debug('Period is OK - Considering variable filtering on %s and %s for %s'%(variable,altvar,f)) # Filter against variable if (l.find("${variable}")>=0): clogger.debug('appending %s based on variable in filename'%f) store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on, fperiod,periods,periods_dict) rep.append(remote_prefix+f) continue if (f not in rep): # local data if not remote_prefix and \ (variable=='*' or "," in variable or fileHasVar(f,variable) or \ (altvar != variable and fileHasVar(f,altvar))) : # Should check time period in the file if not date_regexp clogger.debug('appending %s based on multi-var or var exists in file '%f) store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on, fperiod,periods,periods_dict) rep.append(f) continue # remote data elif remote_prefix : if variable=='*' or "," in variable or \ (variable != altvar and (f.find(altvar)>=0) ): # Should check time period in the file if not date_regexp clogger.debug('appending %s based on multi-var or altvar '%(remote_prefix+f)) store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards, merge_periods_on, fperiod,periods,periods_dict) rep.append(remote_prefix+f) continue else: mess="For remote files, filename pattern (%s) should include"%(remote_prefix+f) mess+=" ${varname} (which is instanciated by variable name or filenameVar)" raise classes.Climaf_Error(mess) else: if not fperiod : clogger.debug('not appending %s because period is None '%f) elif not period.intersects(fperiod) : clogger.debug('not appending %s because period doesn t intersect %s'%(f,period)) else: clogger.debug('not appending %s for some other reason %s'%(f)) # Break on first url with any matching data if len(rep)>0 : clogger.debug('url %s does match for '%l + `kwargs`) break # For wildcard facets, discover facet values + checks for facet in wildcards: s=wildcards[facet] if return_wildcards is not None : if facet=="period" : #print "s=",s," periods_dict=",periods_dict for val in periods_dict : periods_dict[val]=sort_periods_list(list(periods_dict[val])) clogger.info("Attribute period='*' has values %s"%(periods_dict)) return_wildcards["period"]=periods_dict else: if len(s) == 1 : s=s.pop() clogger.info("Attribute %s='%s' has matching value '%s'"%(facet,kwargs[facet],s)) return_wildcards[facet]=s else: rep=list(s); rep.sort() return_wildcards[facet]=rep message="Attribute %s='%s' has multiple values : %s"%(facet,kwargs[facet],list(s)) if return_wildcards : clogger.info(message) else: clogger.error(message) s=return_wildcards[facet] else: clogger.debug("return_wildcards is None") return rep
def ceval_script (scriptCall,deep,recurse_list=[]): """ Actually applies a CliMAF-declared script on a script_call object Prepare operands as fiels and build command from operands and parameters list Assumes that scripts are described in dictionary 'scripts' by templates as documented in operators.cscript Returns a CLiMAF cache data filename """ script=operators.scripts[scriptCall.operator] template=Template(script.command) # Evaluate input data dict_invalues=dict() sizes=[] for op in scriptCall.operands : inValue=ceval(op,userflags=scriptCall.flags,format='file',deep=deep, recurse_list=recurse_list) if inValue is None or inValue is "" : raise Climaf_Driver_Error("When evaluating %s : value for %s is None"\ %(scriptCall.script,`op`)) if isinstance(inValue,list) : size=len(inValue) else : size=1 sizes.append(size) dict_invalues[op]=inValue # # Replace input data placeholders with filenames subdict=dict() opscrs="" if 0 in script.inputs : label,multiple,serie=script.inputs[0] op=scriptCall.operands[0] infile=dict_invalues[op] if not all(map(os.path.exists,infile.split(" "))) : raise Climaf_Driver_Error("Internal error : some input file does not exist among %s:"%(infile)) subdict[ label ]=infile #if scriptCall.flags.canSelectVar : subdict["var"]=varOf(op) if isinstance(op,classes.cdataset) and op.alias and scriptCall.flags.canAlias: filevar,scale,offset,units,filenameVar,missing=op.alias #if script=="select" and ((varOf(op) != filevar) or scale != 1.0 or offset != 0.) : if ((varOf(op) != filevar) or scale != 1.0 or offset != 0.) : subdict["alias"]="%s,%s,%.4g,%.4g"%(varOf(op),filevar,scale,offset) subdict["var"]=filevar if units : subdict["units"]=units if scriptCall.flags.canMissing and missing : subdict["missing"]=missing if isinstance(op,classes.cens) : if not multiple : raise Climaf_Driver_Error( "Script %s 's input #%s cannot accept ensemble %s"\ %(scriptCall.script,0,`op`)) #subdict["labels"]=r'"'+reduce(lambda x,y : "'"+x+"' '"+y+"'", op.labels)+r'"' subdict["labels"]=reduce(lambda x,y : x+"$"+y, op.labels) per=timePeriod(op) if not per.fx and str(per) != "" and scriptCall.flags.canSelectTime: subdict["period"]=str(per) subdict["period_iso"]=per.iso() if scriptCall.flags.canSelectDomain : subdict["domain"]=domainOf(op) i=0 for op in scriptCall.operands : opscrs += op.crs+" - " infile=dict_invalues[op] if not all(map(os.path.exists,infile.split(" "))) : raise Climaf_Driver_Error("Internal error : some input file does not exist among %s:"%(infile)) i+=1 if ( i> 1 or 1 in script.inputs) : label,multiple,serie=script.inputs[i] subdict[ label ]=infile # Provide the name of the variable in input file if script allows for subdict["var_%d"%i]=varOf(op) if isinstance(op,classes.cdataset) and op.alias : filevar,scale,offset,units,filenameVar,missing =op.alias if (varOf(op) != filevar) or (scale != 1.0) or (offset != 0.) : subdict["alias_%d"%i]="%s %s %f %f"%(varOf(op),filevar,scale,offset) subdict["var_%d"%i]=filevar if units : subdict["units_%d"%i]=units if missing : subdict["missing_%d"%i]=missing # Provide period selection if script allows for per=timePeriod(op) if not per.fx and per != "": subdict["period_%d"%i]=str(per) subdict["period_iso_%d"%i]=per.iso() subdict["domain_%d"%i]=domainOf(op) clogger.debug("subdict for operands is "+`subdict`) # substitution is deffered after scriptcall parameters evaluation, which may # redefine e.g period # # Provide one cache filename for each output and instantiates the command accordingly if script.outputFormat is not None : # Compute a filename for each ouptut # Un-named main output main_output_filename=cache.generateUniqueFileName(scriptCall.crs, format=script.outputFormat) subdict["out"]=main_output_filename subdict["out_"+scriptCall.variable]=main_output_filename # Named outputs for output in scriptCall.outputs: subdict["out_"+output]=cache.generateUniqueFileName(scriptCall.crs+"."+output,\ format=script.outputFormat) # Account for script call parameters for p in scriptCall.parameters : #clogger.debug("processing parameter %s=%s"%(p,scriptCall.parameters[p])) subdict[p]=scriptCall.parameters[p] if p=="period" : subdict["period_iso"]=init_period(scriptCall.parameters[p]).iso() subdict["crs"]=opscrs.replace("'","") # # Combine CRS and possibly member_label to provide/complement title if 'title' not in subdict : if 'member_label' in subdict : subdict["title"]=subdict['member_label'] else: subdict["title"]=subdict["crs"] else: if 'member_label' in subdict : subdict["title"]=subdict["title"]+" "+subdict['member_label'] subdict.pop('member_label') # # Substitute all args template=template.safe_substitute(subdict) # # Allowing for some formal parameters to be missing in the actual call: # # Discard remaining substrings looking like : # some_word='"${some_keyword}"' , or: # '"${some_keyword}"' template=re.sub(r'(\w*=)?(\'\")?\$\{\w*\}(\"\')?',r"",template) # # Discard remaining substrings looking like : # some_word=${some_keyword} , or # ${some_keyword} template=re.sub(r"(\w*=)?\$\{\w*\}",r"",template) # # Launch script using command, and check termination #command="PATH=$PATH:"+operators.scriptsPath+template+fileVariables #command="echo '\n\nstdout and stderr of script call :\n\t "+template+\ # "\n\n'> scripts.out ; "+ template+ " >> scripts.out 2>&1" tim1=time.time() clogger.info("Launching command:"+template) # command=subprocess.Popen(template, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) command.wait() # logfile=open('last.out', 'w') logfile.write("\n\nstdout and stderr of script call :\n\t "+template+"\n\n") command_std="" for line in command.stdout: command_std+=line logfile.write(line) logfile.close() if ( command.wait() == 0 ): if script.outputFormat is not None : # Tagging output files with their CliMAF Reference Syntax definition # Un-named main output ok = cache.register(main_output_filename,scriptCall.crs) # Named outputs for output in scriptCall.outputs: ok = ok and cache.register(subdict["out_"+output],\ scriptCall.crs+"."+output) if ok : duration=time.time() - tim1 print("Done in %.1f s with script computation for %s "%\ (duration,`scriptCall`),file=sys.stderr) clogger.debug("Done in %.1f s with script computation for " "%s (command was :%s )"%\ (duration,`scriptCall`,template)) return main_output_filename else : raise Climaf_Driver_Error("Some output missing when executing " ": %s. \n See last.out"%template) else : clogger.debug("script %s has no output"%script.name) return None else: clogger.debug("Full script output:\n"+command_std) comm2=subprocess.Popen(["tail", "-n", "10", "last.out"], stdout=subprocess.PIPE) clogger.error("Last lines of script output:\n"+comm2.stdout.read()) raise Climaf_Driver_Error("Script failure for : %s. More details either in file " "./last.out or by re-runing with clog(\"debug\")" %template)
def selectGenericFiles(urls, **kwargs): """ Allow to describe a ``generic`` file organization : the list of files returned by this function is composed of files which : - match the patterns in ``url`` once these patterns are instantiated by the values in kwargs, and - contain the ``variable`` provided in kwargs - match the `period`` provided in kwargs In the pattern strings, no keyword is mandatory. However, for remote files, filename pattern must include ${varname}, which is instanciated by variable name or ``filenameVar`` (given via :py:func:`~climaf.classes.calias()`); this is for the sake of efficiency (please complain if inadequate) Example : >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*YYYY*.nc)'] /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc In the pattern strings, the keywords that can be used in addition to the argument names (e.g. ${model}) are: - ${variable} : use it if the files are split by variable and filenames do include the variable name, as this speed up the search - YYYY, YYYYMM, YYYYMMDD : use it for indicating the start date of the period covered by each file, if this is applicable in the file naming; use a second time for end date, if applicable (otherwise the assumption is that the whole year -resp. month or day- is included in the file - wildcards '?' and '*' for matching respectively one and any number of characters """ rep = [] period = kwargs['period'] if type(period) is str: period = init_period(period) variable = kwargs['variable'] altvar = kwargs.get('filenameVar', variable) # a dict and an ordered list of date globbing patterns dt = dict(YYYY="????", YYYYMM="??????", YYYYMMDD="????????", YYYYMMDDHH="??????????") lkeys = dt.keys() lkeys.sort(reverse=True) # a dict and an ordered list for matching dates dr = dict(YYYY="([0-9]{4})", YYYYMM="([0-9]{6})", YYYYMMDD="([0-9]{8})", YYYYMMDDHH="([0-9]{10})") rkeys = dr.keys() rkeys.sort(reverse=True) # for l in urls: # Instantiate keywords in pattern with attributes values if re.findall(".*:.*", l): # remote data remote_prefix = ':'.join(l.split(":")[0:-1]) + ':' template = Template(l.split(":")[-1]).safe_substitute(**kwargs) else: # local data remote_prefix = "" template = Template(l).safe_substitute(**kwargs) #print "template after attributes replace : "+template # # Construct a pattern for globbing dates temp2 = template for k in lkeys: temp2 = temp2.replace(k, dt[k]) if remote_prefix: lfiles = sorted(glob_remote_data(remote_prefix, temp2)) clogger.debug("Remote globbing %d files for varname on %s : " % (len(lfiles), remote_prefix + temp2)) else: # local data lfiles = sorted(glob.glob(temp2)) clogger.debug("Globbing %d files for varname on %s : " % (len(lfiles), temp2)) # # If unsuccessful using varname, try with filenameVar if len(lfiles ) == 0 and "filenameVar" in kwargs and kwargs['filenameVar']: # Change value of facet 'variable' kwargs['variable'] = kwargs['filenameVar'] if remote_prefix: # remote data template = Template(l.split(":")[-1]).safe_substitute(**kwargs) else: # local data template = Template(l).safe_substitute(**kwargs) temp2 = template for k in lkeys: temp2 = temp2.replace(k, dt[k]) # if remote_prefix: # lfiles = sorted(glob_remote_data(remote_prefix, temp2)) clogger.debug("Globbing %d files for filenamevar on %s: " % (len(lfiles), remote_prefix + temp2)) else: # local data lfiles = sorted(glob.glob(temp2)) clogger.debug("Globbing %d files for filenamevar on %s: " % (len(lfiles), temp2)) # # Construct regexp for extracting dates from filename regexp = None #print "template before searching dates : "+template for key in rkeys: #print "searchin "+key+" in "+=Template(l) start = template.find(key) if (start >= 0): #print "found "+key regexp = template.replace(key, dr[key], 1) hasEnd = False start = regexp.find(key) if (start >= 0): hasEnd = True regexp = regexp.replace(key, dr[key], 1) break #print "regexp before searching dates : "+regexp # for f in lfiles: #print "processing file "+f # # Analyze file time period fperiod = None if regexp: regexp0 = regexp.replace("*", ".*").replace("?", r".") #print "regexp for extracting dates : "+regexp start = re.sub(regexp0, r'\1', f) if start == f: raise Climaf_Data_Error("Start period not found") #? if hasEnd: end = re.sub(regexp0, r'\2', f) fperiod = init_period("%s-%s" % (start, end)) else: fperiod = init_period(start) #print "period for file %s is %s"%(f,fperiod) # # Filter file time period against required period else: if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \ kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) : # local data if remote_prefix and \ ( (l.find("${variable}")>=0) or variable=='*' or \ fileHasVar(f,variable) or (variable != altvar and fileHasVar(f,altvar)) ) : clogger.debug("adding fixed field :" + f) rep.append(f) # remote data elif remote_prefix is not "": if (l.split(":")[-1].find("${variable}")>=0) or variable=='*' or \ (variable != altvar and (f.find(altvar)>=0) ): clogger.debug("adding fixed field :" + remote_prefix + f) rep.append(remote_prefix + f) else: raise Climaf_Data_Error( "For remote files, filename pattern (%s) should include ${varname} (which is instanciated by variable name or filenameVar)" % f) else: clogger.info( "Cannot yet filter files re. time using only file content." ) rep.append(f) if (fperiod and period.intersects(fperiod)) or not regexp: clogger.debug( 'Period is OK - Considering variable filtering on %s and %s for %s' % (variable, altvar, f)) # Filter against variable if (l.find("${variable}") >= 0): clogger.debug( 'appending %s based on variable in filename' % f) rep.append(remote_prefix + f) continue if (f not in rep): # local data if not remote_prefix and \ (variable=='*' or "," in variable or fileHasVar(f,variable) or \ (altvar != variable and fileHasVar(f,altvar))) : # Should check time period in the file if not regexp clogger.debug( 'appending %s based on multi-var or var exists in file ' % f) rep.append(f) continue # remote data elif remote_prefix: if variable=='*' or "," in variable or \ (variable != altvar and (f.find(altvar)>=0) ): # Should check time period in the file if not regexp clogger.debug( 'appending %s based on multi-var or altvar ' % (remote_prefix + f)) rep.append(remote_prefix + f) continue else: mess = "For remote files, filename pattern (%s) should include" % ( remote_prefix + f) mess += " ${varname} (which is instanciated by variable name or filenameVar)" raise Climaf_Data_Error(mess) else: if not fperiod: clogger.debug('not appending %s because period is None ' % f) else: if not period.intersects(fperiod): clogger.debug( 'not appending %s because period doesn t intersect %s' % (f, period)) # Break on first url with any matching data if len(rep) > 0: clogger.debug('url %s does match for ' % l + ` kwargs `) break return rep
def selectGenericFiles(urls, **kwargs): """ Allow to describe a ``generic`` file organization : the list of files returned by this function is composed of files which : - match the patterns in ``url`` once these patterns are instantiated by the values in kwargs, and - contain the ``variable`` provided in kwargs - match the `period`` provided in kwargs In the pattern strings, no keyword is mandatory Example : >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*YYYY*.nc)'] /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc In the pattern strings, the keywords that can be used in addition to the argument names (e.g. ${model}) are: - ${variable} : use it if the files are split by variable and filenames do include the variable name, as this speed up the search - YYYY, YYYYMM, YYYYMMDD : use it for indicating the start date of the period covered by each file, if this is applicable in the file naming; use a second time for end date, if applicable (otherwise the assumption is that the whole year -resp. month or day- is included in the file - wildcards '?' and '*' for matching respectively one and any number of characters """ rep=[] period=kwargs['period'] if type(period) is str : period=init_period(period) variable=kwargs['variable'] mustHaveVariable=False if "filenameVar" in kwargs and kwargs['filenameVar'] : kwargs['variable']=kwargs['filenameVar'] mustHaveVariable=True for l in urls : template=Template(l) # There is no use to look for files which path is not specific # to the required variable when we know it should if l.find("${variable}") < 0 and mustHaveVariable : continue # # Instantiate keywords in pattern with attributes values template=template.safe_substitute(**kwargs) #print "template after attributes replace : "+template # # Construct a pattern for globbing dates temp2=template dt=dict(YYYY="????",YYYYMM="??????",YYYYMMDD="????????") for k in dt : temp2=temp2.replace(k,dt[k]) clogger.debug("Globbing on : "+temp2) lfiles=glob.glob(temp2) # # Analyze all filenames for f in lfiles : # print "looking at file"+f # Construct regexp for extracting dates from filename dt=dict(YYYY="([0-9]{4})",YYYYMM="([0-9]{6})", YYYYMMDD="([0-9]{10})") regexp=None # print "template before searching dates : "+template lkeys=dt.keys() ; lkeys.sort(reverse=True) for key in lkeys : # print "searchin "+key+" in "+template start=template.find(key) if (start>=0 ) : # print "found "+key regexp=template.replace(key,dt[key],1) hasEnd=False start=regexp.find(key) if (start >=0 ) : hasEnd=True regexp=regexp.replace(key,dt[key],1) break # # Analyze file time period fperiod=None if regexp : regexp=regexp.replace("*",".*").replace("?",r".") # print "regexp for extracting dates : "+regexp start=re.sub(regexp,r'\1',f) if hasEnd : end=re.sub(regexp,r'\2',f) fperiod=init_period("%s-%s"%(start,end)) else : fperiod=init_period(start) # # Filter file time period against required period else : if ( 'frequency' in kwargs and kwargs['frequency']=="fx") : if (l.find("${variable}")>=0) or fileHasVar(f,variable) : clogger.debug("adding fixed field :"+f) rep.append(f) else : clogger.warning("Cannot yet filter files re. time using only file content. TBD") rep.append(f) if (fperiod and period.intersects(fperiod)) or not regexp : # Filter against variable if (l.find("${variable}")>=0) or fileHasVar(f,variable) : # Should check time period in the file if not regexp # print "appending "+f rep.append(f) return rep
def selectGenericFiles(urls, **kwargs): """ Allow to describe a ``generic`` file organization : the list of files returned by this function is composed of files which : - match the patterns in ``url`` once these patterns are instantiated by the values in kwargs, and - contain the ``variable`` provided in kwargs - match the `period`` provided in kwargs In the pattern strings, no keyword is mandatory Example : >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*YYYY*.nc)'] /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc In the pattern strings, the keywords that can be used in addition to the argument names (e.g. ${model}) are: - ${variable} : use it if the files are split by variable and filenames do include the variable name, as this speed up the search - YYYY, YYYYMM, YYYYMMDD : use it for indicating the start date of the period covered by each file, if this is applicable in the file naming; use a second time for end date, if applicable (otherwise the assumption is that the whole year -resp. month or day- is included in the file - wildcards '?' and '*' for matching respectively one and any number of characters """ rep=[] period=kwargs['period'] if type(period) is str : period=init_period(period) variable=kwargs['variable'] altvar=kwargs.get('filenameVar',variable) # a dict and an ordered list of date globbing patterns dt=dict(YYYY="????",YYYYMM="??????",YYYYMMDD="????????") lkeys=dt.keys() ; lkeys.sort(reverse=True) # a dict and an ordered list for matching dates dr=dict(YYYY="([0-9]{4})",YYYYMM="([0-9]{6})", YYYYMMDD="([0-9]{8})") rkeys=dr.keys() ; rkeys.sort(reverse=True) # for l in urls : # Instantiate keywords in pattern with attributes values template=Template(l).safe_substitute(**kwargs) #print "template after attributes replace : "+template # # Construct a pattern for globbing dates temp2=template ; for k in lkeys : temp2=temp2.replace(k,dt[k]) lfiles=glob.glob(temp2) clogger.debug("Globbing %d files for varname on %s : "%(len(lfiles),temp2)) # # If unsuccessful using varname, try with filenameVar if len(lfiles)==0 and "filenameVar" in kwargs and kwargs['filenameVar'] : kwargs['variable']=kwargs['filenameVar'] template=Template(l).safe_substitute(**kwargs) temp2=template for k in lkeys : temp2=temp2.replace(k,dt[k]) # lfiles=glob.glob(temp2) clogger.debug("Globbing %d files for filenamevar on %s: "%(len(lfiles),temp2)) # Construct regexp for extracting dates from filename regexp=None #print "template before searching dates : "+template for key in rkeys : #print "searchin "+key+" in "+=Template(l) start=template.find(key) if (start>=0 ) : #print "found "+key regexp=template.replace(key,dr[key],1) hasEnd=False start=regexp.find(key) if (start >=0 ) : hasEnd=True regexp=regexp.replace(key,dr[key],1) break #print "regexp before searching dates : "+regexp # for f in lfiles : #print "processing file "+f # # Analyze file time period fperiod=None if regexp : regexp0=regexp.replace("*",".*").replace("?",r".") #print "regexp for extracting dates : "+regexp start=re.sub(regexp0,r'\1',f) if start==f: raise Climaf_Data_Error("Start period not found") #? LV if hasEnd : end=re.sub(regexp0,r'\2',f) fperiod=init_period("%s-%s"%(start,end)) else : fperiod=init_period(start) #print "period for file %s is %s"%(f,fperiod) # # Filter file time period against required period else : if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \ kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) : if (l.find("${variable}")>=0) or fileHasVar(f,variable) or fileHasVar(f,altvar) : clogger.debug("adding fixed field :"+f) rep.append(f) else : clogger.warning("Cannot yet filter files re. time using only file content. TBD") rep.append(f) if (fperiod and period.intersects(fperiod)) or not regexp : clogger.debug('Period is OK - Considering variable filtering on %s and %s for %s'%(variable,altvar,f)) # Filter against variable if (l.find("${variable}")>=0): clogger.debug('appending %s based on variable in filename'%f) rep.append(f) continue if f not in rep and ( fileHasVar(f,variable) or fileHasVar(f,altvar) or ("," in variable)): # Should check time period in the file if not regexp clogger.debug('appending %s based on multi-var or var exists in file '%f) rep.append(f) else: if not fperiod : clogger.debug('not appending %s because period is None '%f) else: if not period.intersects(fperiod) : clogger.debug('not appending %s because period doesn t intersect %s'%(f,period)) return rep
def variability_AR5(model, realization, variable, table, data_versions, season="ANN", project="CMIP6", operator=None, operator_args={}, post_operator=None, post_operator_args={}, shift=100, nyears=20, number=20, variability=True, compute=True, house_keeping=False, detrend=True, deep=None): """ Compute the variability according to AR5 Box 2.1 : - select data time series in piControl for the whole of the samples (from its begin+SHIFT, duration consistent with NUMBER samples of size NYEARS); the data variant and version, and the begin date, are selected according to dictionnary DATA_VERSIONS, - transform this data using OPERATOR (and its OPERATOR_ARGS) that should produce one value per year (default being to compute annual or seasonal means) - detrend that data, if required (this is done by default) - build an ensemble representing the samples (NUMBER * NYEARS) - transform each member's result using POST_OPERATOR and POST_OPERATOR_ARGS (default is to compute a time average) - if arg VARIABILITY is False , returns that result (i.e. by defaut the time mean), - otherwise computes and returns the variability as the ensemble standard deviation multiplied by square root of 2 Arg MODELS_WITH_ENOUGH_SPINUP is the list of those models for which the required SHIFT may be relaxed, because they are supposed to be already in a balanced state from the start of published piControl data The returned value is a CliMAF object (either a field or an ensemble, depending on VARIABILITY) Arg COMPUTE, if set to True, drives an immediate lauch of the computation, of CliMAF object, and then, if arg DEEP is True, re-compute all results from scratch, without using CliMAF cached values for intermediate results. Arg HOUSE_KEEPING, if set to True, allows to release CliMAF cache intermediate results, to keep cache use as low as possible Used e.g for variability of : - plain variables - walsh seasonnality index - number of dry days per year - year mean daily precipitation for non-dry days - inter_annual variability for any variable, using : * post_operator=inter_annual_variability * post_operator_args={"factor" : 1.414} This version yet tested only on CMIP6 models """ init_trend() from climaf.operators import ctrend, csubtrend if realization not in data_versions["piControl"][variable][table][model]: realization = data_versions["piControl"][variable][table][model].keys( )[0] grid, version, data_period = data_versions["piControl"][variable][table][ model][realization] duration = nyears * number true_begin = int(data_period.split('-')[0][0:4]) end = int(data_period.split('-')[1][0:4]) begin = true_begin + shift if begin + duration - 1 > end: # In CMIP6, some models have enough spinup before piControl start, # but a too short piControl length # We assume that this has been dealt with at the stage of data selection, and allow # to release the constraint on shift at the beginning of the data period alt_begin = end - duration + 1 if alt_begin >= true_begin: begin = alt_begin else: message="Duration for %s %s %s %s %s %s is too short : [%d - %d] even with no shift %d is shorter than %d years "%\ (model,variable,table,realization,version,grid,true_begin,end,shift,duration) raise ValueError(message) # period = "%g-%g" % (begin, begin + duration - 1) base_dict = dict(project=project, experiment="piControl", model=model, institute=institute_for_model(model), period=period, variable=variable, table=table, version=version, grid=grid, realization=realization) if project == "CMIP6": base_dict.update(mip="CMIP") # Basic dataset (e.g. precip) basic = ds(**base_dict) dat = basic # Implement the operation if required, otherwise seasonal or yearly average if operator is None: if season in ["ann", "ANN", "anm"]: dat_op = ccdo(dat, operator="yearmean") else: dat_op = ccdo_fast(dat, operator="selseason,%s -seasmean" % season) else: if season in ["ann", "ANN", "anm"]: dat_op = operator(dat, **operator_args) else: dat_season = ccdo_fast(dat, operator="selseason,%s" % season) dat_op = operator(dat_season, **operator_args) dat = dat_op # Detrend the data if required if detrend: a = ctrend(dat) ap = ccdo_fast(a, operator="mulc,0" ) # Do not want to have a zero-mean detrended serie detrended = csubtrend(dat, ap, a.b) dat = detrended # Build an ensemble which members are the slices econtrol = cens() slices = [ "%d-%d" % (begin + n * nyears, begin + (n + 1) * nyears - 1) for n in range(0, number) ] for period in slices: econtrol[period] = ccdo_fast(dat, operator="seldate," + init_period(period).iso()) # On each slice, implement the required post operation, otherwise compute a plain average if post_operator is not None: cmeans = cens() for member in econtrol: cmeans[member] = post_operator(econtrol[member], **post_operator_args) else: cmeans = ccdo_fast(econtrol, operator="timmean") if variability is True: # Compute variability over the slices ensemble variab1 = ccdo_ens(cmeans, operator='ensstd1') variab = ccdo_fast(variab1, operator="mulc,1.414") # cf. AR5 Box 2.1 # if compute: if variability: cfile(variab, deep=deep) else: cfile(cmeans, deep=deep) if house_keeping: # Discard intermediate data cdrop(basic) cdrop(dat_op) if operator is not None and season not in ["ann", "ANN", "anm"]: cdrop(dat_season) if detrend: cdrop(a) cdrop(a.b) cdrop(ap) cdrop(detrended) cdrop(dat) for period in slices: cdrop(econtrol[period]) # if variability: if house_keeping: cdrop(cmeans) cdrop(variab1) return variab else: return cmeans