def capply_operator (climaf_operator, *operands, **parameters): """ Create object for application of an internal OPERATOR to OPERANDS with keywords PARAMETERS. """ clogger.error("Not yet developped - TBD") return None
def ds(*args,**kwargs) : """ Returns a dataset from its full Climate Reference Syntax string. Example :: >>> ds('CMIP5.historical.pr.[1980].global.monthly.CNRM-CM5.r1i1p1.mon.Amon.atmos.last') Also a shortcut for :py:meth:`~climaf.classes.cdataset`, when used with with only keywords arguments. Example :: >>> cdataset(project='CMIP5', model='CNRM-CM5', experiment='historical', frequency='monthly',\ simulation='r2i3p9', domain=[40,60,-10,20], variable='tas', period='1980-1989', version='last') """ # Note : muts be kept phased with self.crs defined in # cdataset.init(), both for # function name and CRS syntax if len(args) >1 : raise Climaf_Classes_Error("Must provide either only a string or only keyword arguments") #clogger.debug("Entering , with args=%s, kwargs=%s"%(`args`,`kwargs`)) if (len(args)==0) : return cdataset(**kwargs) # Front-end to cdataset crs=args[0] results=[] for cproj in cprojects : try : dataset = cprojects[cproj].crs2ds(crs) except Climaf_Classes_Error: dataset=None if (dataset) : results.append(dataset) if len(results) > 1 : e="CRS expressions %s is ambiguous among projects %s"%(crs,`cprojects.keys()`) clogger.error(e) raise Climaf_Classes_Error(e) elif len(results) == 0 : e="CRS expressions %s is not valid for any project in %s"%(crs,`cprojects.keys()`) raise Climaf_Classes_Error(e) return None else : return results[0]
def getCRS(filename): """ Returns the CRS expression found in FILENAME's meta-data""" import subprocess if re.findall(".nc$", filename): form = 'ncdump -h %s | grep -E "CRS_def *=" | ' + \ 'sed -r -e "s/.*:CRS_def *= *\\\"(.*)\\\" *;$/\\1/" ' elif re.findall(".png$", filename): form = 'identify -verbose %s | grep -E " *CRS_def: " | sed -r -e "s/.*CRS_def: *//"' elif re.findall(".pdf$", filename): form = 'pdfinfo %s | grep "Keywords" | awk -F ":" \'{print $2}\' | sed "s/^ *//g"' elif re.findall(".eps$", filename): form = 'exiv2 -p x %s | grep "CRS_def" | awk \'{for (i=4;i<=NF;i++) {print $i " "} }\' ' else: clogger.error("unknown filetype for %s" % filename) return None command = form % filename try: rep = subprocess.check_output(command, shell=True).replace('\n', '') if (rep == "") and ('Empty.png' not in filename): clogger.error("file %s is not well formed (no CRS)" % filename) if re.findall(".nc$", filename): rep = rep.replace(r"\'", r"'") except: rep = "failed" clogger.debug("CRS expression read in %s is %s" % (filename, rep)) return rep
def capply(climaf_operator, *operands, **parameters): """ Builds the object representing applying a CliMAF operator (script, function or macro) Returns results as a list of CliMAF objects and stores them if auto-store is on """ res=None if operands is None or operands[0] is None : raise Climaf_Driver_Error("Operands is None") opds=map(str,operands) if climaf_operator in operators.scripts : #clogger.debug("applying script %s to"%climaf_operator + `opds` + `parameters`) res=capply_script(climaf_operator, *operands, **parameters) # Evaluate object right now if there is no output to manage op=operators.scripts[climaf_operator] if op.outputFormat is None : ceval(res,userflags=copy.copy(op.flags)) elif climaf_operator in cmacro.cmacros : if (len(parameters) > 0) : raise Climaf_Driver_Error("Macros cannot be called with keyword args") clogger.debug("applying macro %s to"%climaf_operator + `opds` ) res=cmacro.instantiate(cmacro.cmacros[climaf_operator],*operands) elif climaf_operator in operators.operators : clogger.debug("applying operator %s to"%climaf_operator + `opds` + `parameters`) res=capply_operator(climaf_operator,*operands, **parameters) else: clogger.error("%s is not a known operator nor script"%climaf_operator) return res
def register(filename,crs): """ Adds in FILE a metadata named 'CRS_def' and with value CRS, and a metadata 'CLiMAF' with CliMAF version and ref URL Records this FILE in dict crs2filename Silently skip non-existing files """ # First read index from file if it is yet empty - No : done at startup # if len(crs2filename.keys()) == 0 : cload() # It appears that we have to let some time to the file system for updating its inode tables waited=0 while waited < 20 and not os.path.exists(filename) : time.sleep(0.1) waited += 1 #time.sleep(0.5) if os.path.exists(filename) : #while time.time() < os.path.getmtime(filename) + 0.2 : time.sleep(0.2) if re.findall(".nc$",filename) : command="ncatted -h -a CRS_def,global,o,c,\"%s\" -a CliMAF,global,o,c,\"CLImate Model Assessment Framework version %s (http://climaf.rtfd.org)\" %s"%\ (crs,version,filename) if re.findall(".png$",filename) : command="convert -set \"CRS_def\" \"%s\" -set \"CliMAF\" \"CLImate Model Assessment Framework version %s (http://climaf.rtfd.org)\" %s %s.png && mv -f %s.png %s"%\ (crs,version,filename,filename,filename,filename) clogger.debug("trying stamping by %s"%command) if ( os.system(command) == 0 ) : crs2filename[crs]=filename clogger.info("%s registered as %s"%(crs,filename)) return True else : clogger.critical("cannot stamp by %s"%command) return None else : clogger.error("file %s does not exist (for crs %s)"%(filename,crs))
def cprotect(obj, stop=False): """ Protects the cache file for a given object (or stops protection with arg 'stop=True'). In order to erase it, argument 'force=True' must then be used with function :py:func:`~climaf.cache.craz` or :py:func:`~climaf.cache.cdrop` """ if isinstance(obj, cobject): crs = repr(obj) if isinstance(obj, cdataset): crs = "select(" + crs + ")" elif type(obj) is str: crs = obj else: clogger.error("%s is not a CliMAF object" % repr(obj)) return if crs in crs2filename: if stop is False: clogger.info("Protecting cached value for " + crs) os.system("chmod -w " + crs2filename[crs]) else: clogger.info("Stopping protection on cached value for " + crs) os.system("chmod +w " + crs2filename[crs]) return else: clogger.info("%s is not (yet) cached; use cfile() to cache it" % crs)
def isCached(self) : """ TBD : analyze if a remote dataset is locally cached """ clogger.error("TBD - remote datasets are not yet cached") rep=False return rep
def register(filename,crs): """ Adds in FILE a metadata named CRS_def and with value CRS. Records this FILE in dict crs2filename Silently skip non-existing files """ # First read index from file if it is yet empty if len(crs2filename.keys()) == 0 : cload() # It appears that we have to allow the file system some time for updating its inode tables waited=0 while waited < 10 and not os.path.exists(filename) : time.sleep(0.5) waited += 1 time.sleep(0.5) if os.path.exists(filename) : #while time.time() < os.path.getmtime(filename) + 0.2 : time.sleep(0.2) if re.findall(".nc$",filename) : command="ncatted -h -a CRS_def,global,o,c,\"%s\" %s"%(crs,filename) if re.findall(".png$",filename) : command="convert -set \"CRS_def\" \"%s\" %s %s.png && mv -f %s.png %s"%\ (crs,filename,filename,filename,filename) clogger.debug("trying stamping by %s"%command) if ( os.system(command) == 0 ) : crs2filename[crs]=filename clogger.info("%s registered as %s"%(crs,filename)) return True else : clogger.critical("cannot stamp by %s"%command) return None else : clogger.error("file %s does not exist (for crs %s)"%(filename,crs))
def selectEmFiles(**kwargs): # Pour A et L : mon, day1, day2, 6hLev, 6hPlev, 3h simulation = kwargs['simulation'] frequency = kwargs['frequency'] variable = kwargs['variable'] period = kwargs['period'] realm = kwargs['realm'] # freqs = {"mon": "", "3h": "_3h"} f = frequency if f in freqs: f = freqs[f] rep = [] # Must look for all realms, here identified by a single letter if realm == "*": lrealm = ["A", "L", "O", "I"] else: lrealm = [realm] for realm in lrealm: clogger.debug("Looking for realm " + realm) # Use EM data for finding data dir freq_for_em = f if realm == 'I': freq_for_em = "" # This is a special case ... command = [ "grep", "^export EM_DIRECTORY_" + realm + freq_for_em + "=", os.path.expanduser(os.getenv("EM_HOME")) + "/expe_" + simulation ] try: ex = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except: clogger.error("Issue getting archive_location for " + simulation + " for realm " + realm + " with: " + repr(command)) break if ex.wait() == 0: dir = ex.stdout.read().split("=")[1].replace('"', "").replace("\n", "") clogger.debug("Looking at dir " + dir) if os.path.exists(dir): lfiles = os.listdir(dir) for fil in lfiles: # clogger.debug("Looking at file "+fil) fileperiod = periodOfEmFile(fil, realm, f) if fileperiod and period.intersects(fileperiod): if fileHasVar(dir + "/" + fil, variable): rep.append(dir + "/" + fil) # clogger.debug("Done with Looking at file "+fil) else: clogger.error( "Directory %s does not exist for simulation %s, realm %s " "and frequency %s" % (dir, simulation, realm, f)) else: clogger.info("No archive location found for " + simulation + " for realm " + realm + " with: " + repr(command)) return rep
def csync(update=False): """ Merges current in-memory cache index and current on-file cache index for updating both If arg `update` is True, additionally ensures consistency between files set and index content, either : - if cache.stamping is true, by reading CRS in all files - else, by removing files which are not in the index; this may erase result files which have been computed by another running instance of CliMAF """ # import pickle global cacheIndexFileName global dropped_crs # Merge index on file and index in memory file_index = cload(True) for crs in dropped_crs: file_index.pop(crs, None) crs2filename.update(file_index) # check if cache index is up to date; if not enforce consistency if update: clogger.info("Listing crs from files present in cache") files_in_cache = list_cache() files_in_cache.sort() files_in_index = crs2filename.values() files_in_index.sort() if files_in_index != files_in_cache: if stamping: clogger.info("Rebuilding cache index from file content") rebuild() else: clogger.warning( 'In no stamp mode, there is no way to seriously identify CRS from files in cache !' ) # clogger.warning('Removing cache files which content is not known. # This is an issue in concurrent mode !') # for fil in files_in_cache : # if fil not in files_in_index : # os.system("rm %"%fil) # else : # Should also remove empty files, as soon as # file creation will be atomic enough # Save index to disk fn = os.path.expanduser(cacheIndexFileName) try: with open(fn, "w") as cacheIndexFile: pickle.dump(crs2filename, cacheIndexFile) dropped_crs = [] except: if update: if os.path.isfile(fn) and len(files_in_cache > 0): clogger.error("Issue when writing cache index %s" % fn)
def __init__(self, op, command, canOpendap=False, canSelectVar=False, canSelectTime=False, canSelectDomain=False, canAggregateTime=False, canAlias=False, canMissing=False, commuteWithEnsemble=False): clogger.error("Not yet developped")
def cimport(cobject,crs) : clogger.debug("cimport called with argument",cobject) clogger.debug("should check syntax of arg 'crs' -TBD") clogger.warning("cimport is not for the dummies - Playing at your own risks !") import numpy, numpy.ma if isinstance(cobject,numpy.ma.MaskedArray) : clogger.debug("for now, use a file for importing - should revisit - TBD") clogger.error("not yet implemented fro Masked Arrays - TBD") elif isinstance(cobject,str) : cache.register(cobject,crs) else : clogger.error("argument is not a Masked Array nor a filename",cobject)
def cload_for_project(project): """ Append to the cache index dict those left index entries for 'project' which evaluate successfully """ d=crs_not_yet_evaluable[project] for crs in d.copy() : try : #print "evaluating crs="+crs eval(crs, sys.modules['__main__'].__dict__) crs2filename[crs]=d[crs] d.pop(crs) except: clogger.error("CRS expression %s is not valid for project %s"%(crs,project))
def cload_for_project(project): """ Append to the cache index dict those left index entries for 'project' which evaluate successfully """ d = crs_not_yet_evaluable[project] for crs in d.copy(): try: # print "evaluating crs="+crs eval(crs, sys.modules['__main__'].__dict__) crs2filename[crs] = d[crs] d.pop(crs) except: clogger.error("CRS expression %s is not valid for project %s" % (crs, project))
def timePeriod(cobject) : """ Returns a time period for a CliMAF object : if object is a dataset, returns its time period, otherwise returns time period of first operand """ if isinstance(cobject,classes.cdataset) : return cobject.period elif isinstance(cobject,classes.ctree) : clogger.debug("for now, timePeriod logic for scripts output is basic (1st operand) - TBD") return timePeriod(cobject.operands[0]) elif isinstance(cobject,classes.scriptChild) : clogger.debug("for now, timePeriod logic for scriptChilds is basic - TBD") return timePeriod(cobject.father) elif isinstance(cobject,classes.cens) : clogger.debug("for now, timePeriod logic for 'cens' objet is basic (1st member)- TBD") return timePeriod(cobject.members[0]) else : clogger.error("unkown class for argument "+`cobject`)
def selectEmFiles(**kwargs) : #POur A et L : mon, day1, day2, 6hLev, 6hPlev, 3h simulation=kwargs['simulation'] frequency=kwargs['frequency'] variable=kwargs['variable'] period=kwargs['period'] realm=kwargs['realm'] # freqs={ "mon" : "" , "3h" : "_3h"} f=frequency if f in freqs : f=freqs[f] rep=[] # Must look for all realms, here identified by a single letter if realm=="*" : lrealm= ["A", "L", "O", "I" ] else: lrealm=[ realm ] for realm in lrealm : clogger.debug("Looking for realm "+realm) # Use EM data for finding data dir freq_for_em=f if realm == 'I' : freq_for_em="" # This is a special case ... command=["grep", "^export EM_DIRECTORY_"+realm+freq_for_em+"=", os.path.expanduser(os.getenv("EM_HOME"))+"/expe_"+simulation ] try : ex = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except : clogger.error("Issue getting archive_location for "+ simulation+" for realm "+realm+" with: "+`command`) break if ex.wait()==0 : dir=ex.stdout.read().split("=")[1].replace('"',"").replace("\n","") clogger.debug("Looking at dir "+dir) if os.path.exists(dir) : lfiles= os.listdir(dir) for fil in lfiles : #clogger.debug("Looking at file "+fil) fileperiod=periodOfEmFile(fil,realm,f) if fileperiod and period.intersects(fileperiod) : if fileHasVar(dir+"/"+fil,variable) : rep.append(dir+"/"+fil) #clogger.debug("Done with Looking at file "+fil) else : clogger.error("Directory %s does not exist for EM simulation %s, realm %s " "and frequency %s"%(dir,simulation,realm,f)) else : clogger.info("No archive location found for "+ simulation+" for realm "+realm+" with: "+`command`) return rep
def complement(crsb, crse, crs) : """ Extends time period of file object of CRSB (B for 'begin') with file object of CRSE (E for 'end') for creating file object of CRS. Assumes that everything is OK with args compatibility and file contents """ fileb=crs2filename[crsb] filee=crs2filename[crse] filet=generateUniqueFileName(crs) command="ncrcat -O %s %s %s"%(fileb,filee,filet) if ( os.system(command) != 0 ) : clogger.error("Issue when merging %s and %s in %s (using command:%s)"%\ (crsb,crse,crs,command)) return None else : cdrop(crsb) ; cdrop(crse) register(filet,crs) return filet
def complement(crsb, crse, crs): """ Extends time period of file object of CRSB (B for 'begin') with file object of CRSE (E for 'end') for creating file object of CRS. Assumes that everything is OK with args compatibility and file contents """ fileb = crs2filename[crsb] filee = crs2filename[crse] filet = generateUniqueFileName(crs) command = "ncrcat -O %s %s %s" % (fileb, filee, filet) if os.system(command) != 0: clogger.error("Issue when merging %s and %s in %s (using command:%s)" % (crsb, crse, crs, command)) return None else: cdrop(crsb) cdrop(crse) register(filet, crs) return filet
def cdrop(obj, rm=True) : """ Deletes the cached file for a CliMAF object, if it exists Args: obj (cobject or string) : object to delete, or its string representation (CRS) rm (bool) : for advanced use only; should we actually delete (rm) the file, or just forget it in CliMAF cache index Returns: None if object does not exists, False if failing to delete, True if OK Example :: >>> dg=ds(project='example', simulation='AMIPV6ALB2G', variable='tas', period='1980-1981') >>> f=cfile(dg) >>> os.system('ls -al '+f) >>> cdrop(dg) """ global crs2filename if (isinstance(obj,cobject) ): crs=`obj` if (isinstance(obj, cdataset) ) : crs="select("+crs+")" elif type(obj) is str : crs=obj else : clogger.error("%s is not a CliMAF object"%`obj`) return if crs in crs2filename : clogger.info("discarding cached value for "+crs) fil=crs2filename.pop(crs) if rm : try : os.remove(fil) return True except: clogger.warning("When trying to remove %s : file does not exist in cache"%crs) return False else : clogger.info("%s is not cached"%crs) return None
def ceval_select(includer,included,userflags,format,deep,derived_list,recurse_list) : """ Extract object INCLUDED from (existing) object INCLUDER, taking into account the capability of the user process (USERFLAGS) and the required delivering FORMAT(file or object) """ if format=='file' : if userflags.canSelectTime or userflags.canSelectDomain: clogger.debug("TBD - should do smthg smart when user can select time or domain") #includer.setperiod(included.period) incperiod=timePeriod(included) clogger.debug("extract sub period %s out of %s"%(`incperiod`,includer.crs)) extract=capply('select',includer, period=`incperiod`) objfile=ceval(extract,userflags,'file',deep,derived_list,recurse_list) if objfile : crs=includer.buildcrs(period=incperiod) return(cache.rename(objfile,crs)) else : clogger.critical("Cannot evaluate "+`extract`) else : clogger.error("Can yet process only files - TBD")
def getCRS(filename) : """ Returns the CRS expression found in FILENAME's meta-data""" import subprocess if re.findall(".nc$",filename) : form='ncdump -h %s | grep -E "CRS_def *=" | '+\ 'sed -r -e "s/.*:CRS_def *= *\\\"(.*)\\\" *;$/\\1/" ' elif re.findall(".png$",filename) : form='identify -verbose %s | grep -E " *CRS_def: " | sed -r -e "s/.*CRS_def: *//"' else : clogger.critical("unknown filetype for %s"%filename) return None command=form%filename try: rep=subprocess.check_output(command, shell=True).replace('\n','') if (rep == "" ) : clogger.error("file %s is not well formed (no CRS)"%filename) if re.findall(".nc$",filename) : rep=rep.replace(r"\'",r"'") except: rep="failed" clogger.debug("CRS expression read in %s is %s"%(filename,rep)) return rep
def browse_tree(cobj,func,results): """ Browse a CliMAF object's tree, accumulating in 'results' the values returned by 'func' on each tree node or leave (if they are not None) """ if isinstance(cobj,cdataset) or isinstance(cobj,cdummy) : res=func(cobj) if res : partial.append(res) elif isinstance(cobj,ctree) : res=func(cobj.operator) if res : partial.append(res) for op in cobj.operands : browse_tree(op,func,partial) elif isinstance(cobj,scriptChild) : browse_tree(cobj.father,func,partial) elif isinstance(cobj,cpage) : for line in cobj.fig_lines : map(lambda x : browse_tree(x,func,partial), line) elif cobj is None : return else : clogger.error("Cannot yet handle object :%s", `cobj`) return
def set_variable(obj, varname, format) : """ Change to VARNAME the variable name for OBJ, which FORMAT maybe 'file' or 'MaskedArray'. Also set the variable long_name using CF convention (TBD) """ if obj is None : return None long_name=CFlongname(varname) if (format=='file') : oldvarname=varOfFile(obj) if (oldvarname != varname) : command="ncrename -v %s,%s %s >/dev/null 2>&1"%(oldvarname,varname,obj) if ( os.system(command) != 0 ) : clogger.error("Issue with changing varname to %s in %s"%(varname,obj)) return None clogger.debug("Varname changed to %s in %s"%(varname,obj)) command="ncatted -a long_name,%s,o,c,%s %s"%(varname,long_name,obj) if ( os.system(command) != 0 ) : clogger.error("Issue with changing long_name for var %s in %s"% (varname,obj)) return None return True elif (format=='MaskedArray') : clogger.warning('TBD - Cannot yet set the varname for MaskedArray') else : clogger.error('Cannot handle format %s'%format)
def cload() : global crs2filename if len(crs2filename) != 0 : Climaf_Driver_Error( "attempt to reset file index - would lead to inconsistency !") try : cacheIndexFile=file(os.path.expanduser(cacheIndexFileName), "r") crs2filename=pickle.load(cacheIndexFile) cacheIndexFile.close() except: pass #clogger.debug("no index file yet") # inconsistents=[] for crs in crs2filename.copy() : # We may have some crs inherited from past sessions and for which # some operator may have become non-standard try : eval(crs, sys.modules['__main__'].__dict__) except: #clogger.debug("Inconsistent cache object is skipped : %s"%crs) crs2filename.pop(crs) inconsistents.append(crs) # Analyze projects of inconsistent cache objects projects=set() for crs in inconsistents : ps=guess_projects(crs) for p in ps : if p not in cprojects : projects.add(p) if projects : clogger.error( "The cache has %d objects for non-declared projects %s.\n" "For using it, consider including relevant project(s) " "declaration(s) in ~/.climaf and restarting CliMAF.\n" "You can also declare these projects right now and call 'csync(True)'\n" "Or you can erase corresponding data by 'crm(pattern=...project name...)'"% \ (len(inconsistents),`list(projects)`))
def domainOf(cobject) : """ Returns a domain for a CliMAF object : if object is a dataset, returns its domain, otherwise returns domain of first operand """ if isinstance(cobject,classes.cdataset) : if type(cobject.domain) is list : rep="" for coord in cobject.domain[0:-1] : rep=r"%s%d,"%(rep,coord) rep="%s%d"%(rep,cobject.domain[-1]) return(rep) else : if cobject.domain == "global" : return "" else : return(cobject.domain) elif isinstance(cobject,classes.ctree) : clogger.debug("For now, domainOf logic for scripts output is basic (1st operand) - TBD") return domainOf(cobject.operands[0]) elif isinstance(cobject,classes.scriptChild) : clogger.debug("For now, domainOf logic for scriptChilds is basic - TBD") return domainOf(cobject.father) elif isinstance(cobject,classes.cens) : clogger.debug("for now, domainOf logic for 'cens' objet is basic (1st member)- TBD") return domainOf(cobject.members[0]) else : clogger.error("Unkown class for argument "+`cobject`)
def cread(datafile,varname=None): import re if not datafile : return(None) if re.findall(".png$",datafile) : subprocess.Popen(["display",datafile,"&"]) elif re.findall(".nc$",datafile) : clogger.debug("reading NetCDF file %s"%datafile) if varname is None: varname=varOfFile(datafile) if varname is None: return(None) from Scientific.IO.NetCDF import NetCDFFile as ncf fileobj=ncf(datafile) #import netCDF4 #fileobj=netCDF4.Dataset(datafile) # Note taken from the CDOpy developper : .data is not backwards # compatible to old scipy versions, [:] is data=fileobj.variables[varname][:] fillv=fileobj.variables[varname]._FillValue import numpy.ma rep= numpy.ma.array(data,mask = data==fillv) fileobj.close() return(rep) else : clogger.error("cannot yet handle %s"%datafile) return None
def __init__(self, name, command, format="nc", canOpendap=False, commuteWithTimeConcatenation=False, commuteWithSpaceConcatenation=False, canSelectVar=False, **kwargs): """ Declare a script or binary as a 'CliMAF operator', and define a Python function with the same name Args: name (str): name for the CliMAF operator. command (str): script calling sequence, according to the syntax described below. format (str): script outputs format -- either 'nc', 'png', 'pdf', 'eps', 'None' or 'graph' ('graph' allows to the user to choose three different graphic output formats: 'png', 'pdf' or 'eps') or 'txt' (the text output are not managed by CliMAF, but only displayed - 'txt' allows to use e.g. 'ncdump -h' from inside CliMAF); defaults to 'nc' canOpendap (bool, optional): is the script able to use OpenDAP URIs ? default to False commuteWithTimeConcatenation (bool, optional): can the operation commute with concatenation of time periods ? set it to true, if the operator can be applied on time chunks separately, in order to allow for incremental computation / time chunking; defaults to False commuteWithSpaceConcatenation (bool, optional): can the operation commute with concatenation of space domains ? defaults to False (see commuteWithTimeConcatenation) **kwargs : possible keyword arguments, with keys matching '<outname>_var', for providing a format string allowing to compute the variable name for output 'outname' (see below). Returns: None The script calling sequence pattern string (arg 'command') indicates how to build the system call which actually launches the script, with a match between python objects and formal arguments; For introducing the syntax, please consider this example, with the following commands:: >>> cscript('mycdo','cdo ${operator} ${in} ${out}') >>> # define some dataset >>> tas_ds = ds(project='example', simulation='AMIPV6ALB2G', variable='tas', period='1980-1981') >>> # Apply operator 'mycdo' to dataset 'tas_ds', choosing a given 'operator' argument >>> tas_avg = mycdo(tas_ds,operator='timavg') CliMAF will later on launch this call behind the curtain:: $ cdo tim_avg /home/my/tmp/climaf_cache/8a/5.nc /home/my/tmp/climaf_cache/4e/4.nc where : - the last filename is generated by CliMAF from the formal expression describing 'tas_avg', and will receive the result - the first filename provides a file generated by CliMAF which includes the data required for tas_ds There are a number of examples declared in module :download:`standard_operators <../climaf/standard_operators.py>`. **Detailed syntax**: - formal arguments appear as : ``${argument}`` (in the example : ``${in}``, ``${out}``, ``${operator}`` ) - except for reserved keywords, arguments in the pattern will be replaced by the values for corresponding keywords used when invoking the diagnostic operator: - in the example above : argument ``operator`` is replaced by value ``timavg``, which is a keyword known to the external binary called, CDO - reserved argument keywords are : - **in, in_<digit>, ins, ins_<digit>, mmin** : they will be replaced by CliMAF managed filenames for input data, as deduced from dataset description or upstream computation; these filenames can actually be remote URLs (if the script can use OpenDAP, see args), local 'raw' data files, or CliMAF cache filenames - **in** stands for the URL of the first dataset invoked in the operator call - **in_<digit>** stands for the next ones, in the same order - **ins** and **ins_<digit>** stand for the case where the script can select input from multiple input files or URLs (e.g. when the whole period to process spans over multiple files); in that case, a single string (surrounded with double quotes) will carry multiple URLs - **mmin** stands for the case where the script accepts an ensemble of datasets (only for first input stream yet). CliMAF will replace the keyword by a string composed of the corresponding input filenames (not surrounded by quotes - please add them yourself in declaration); see also ``labels`` below - **var, var_<digit>** : when a script can select a variable in a multi-variable input stream, this is declared by adding this keyword in the calling sequence; CliMAF will replace it by the actual variable name to process; 'var' stands for first input stream, 'var_<digit>' for the next ones; - in the example above, we assume that external binary CDO is not tasked with selecting the variable, and that CliMAF must feed CDO with a datafile where it has already performed the selection - **period, period_<digit>** : when a script can select a time period in the content of a file or stream, it should declare it by putting this keyword in the pattern, which will be replaced at call time by the period written as <date1>-<date2>, where date is formated as YYYYMMDD ; - time intervals must be interpreted as [date1, date2[ - 'period' stands for the first input_stream, - 'period_<n>' for the next ones, in the order of actual call; - in the example above, this keyword is not used, which means that CliMAF has to select the period upstream of feeding CDO with the data - **period_iso, period_iso_<digit>** : as for **period** above, except that the date formating fits CDO conventions : - date format is ISO : YYYY-MM-DDTHH:MM:SS - interval is [date1,date2_iso], where date2_iso is 1 minute before date2 - separator between dates is : , - **domain, domain_<digit>** : when a script can select a domain in the input grid, this is declared by adding this keyword in the calling sequence; CliMAF will replace it by the domain definition if needed, as 'latmin,latmax,lonmin,lonmax' ; 'domain' stands for first input stream, 'domain_<digit>' for the next ones : - in the example above, we assume that external binary CDO is not tasked with selecting the domain, and that CliMAF must feed CDO with a datafile where it has already performed the selection - **out, out_<word>** : CliMAF provide file names for output files (if there is no such field, the script will have only 'side effects', e.g. launch a viewer). Main output file must be created by the script with the name provided at the location of argument ${out}. Using arguments like 'out_<word>' tells CliMAF that the script provide some secondary output, which will be symbolically known in CliMAF syntax as an attribute of the main object; by default, the variable name of each output equals the name of the output (except for the main ouput, which variable name is supposed to be the same as for the first input); for other cases, see argument \*\*kwargs to provide a format string, used to derive the variable name from first input variable name as in e.g. : ``output2_var='std_dev(%s)'`` for the output labelled output2 (i.e. declared as '${out_output2}') or ``_var='std_dev(%s)'`` for the default (main) output - in the example above, we just apply the convention used by CDO, which expects that you provide an output filename as last argument on the command line. See example mean_and_sdev in doc for advanced usage. - **crs** : will be replaced by the CliMAF Reference Syntax expression describing the first input stream; can be useful for plot title or legend - **alias** : used if the script can make an on the fly re-scaling and renaming of a variable. Will be replaced by a string which pattern is : 'new_varname,file_varname,scale,offset'. The script should then transform on reading as new_varname = file_varname * scale + offset - **units, units_<digit>** : means that the script can set the units on-the-fly while reading one of the input streams - **missing** : means that the script can make an on-the-fly transformation of a given constant to missing values - **labels** : for script accepting ensembles, CliMAF will replace this keyword by a string bearing the labels associated with the ensemble, with delimiter $ as e.g. in: "CNRM-CM5 is fine$IPSL-CM5-LR is not bad$CCSM-29 is ..." """ # Check that script name do not clash with an existing symbol if name in sys.modules['__main__'].__dict__ and name not in scripts: clogger.error("trying to define %s as an operator, " "while it exists as smthing else" % name) return None if name in scripts: clogger.warning("Redefining CliMAF script %s" % name) # # Check now that script is executable scriptcommand = command.split(' ')[0].replace("(", "") ex = subprocess.Popen(['which', scriptcommand], stdout=subprocess.PIPE) if ex.wait() != 0: Climaf_Operator_Error("defining %s : command %s is not " "executable" % (name, scriptcommand)) executable = ex.stdout.read().replace('\n', '') # # Analyze inputs field keywords and populate dict # attribute 'inputs' with some properties self.inputs = dict() commuteWithEnsemble = True it = re.finditer( r"\${(?P<keyw>(?P<mult>mm)?in(?P<serie>s)?(_(?P<n>([\d]+)))?)}", command) for oc in it: if oc.group("n") is not None: rank = int(oc.group("n")) else: rank = 0 if rank in self.inputs: Climaf_Operator_Error( "When defining %s : duplicate declaration for input #%d" % (name, rank)) serie = (oc.group("serie") is not None) multiple = (oc.group("mult") is not None) if multiple: if rank != 0: raise Climaf_Operator_Error( "Only first operand may accept members") if serie: raise Climaf_Operator_Error("Operand %s cannot both accept" "members and files set" % oc.group("keyw")) commuteWithEnsemble = False self.inputs[rank] = (oc.group("keyw"), multiple, serie) if len(self.inputs) == 0: Climaf_Operator_Error( "When defining %s : command %s must include at least one of " "${in} ${ins} ${mmin} or ${in_..} ... for specifying how CliMAF" " will provide the input filename(s)" % (name, command)) # print self.inputs for i in range(len(self.inputs)): if i + 1 not in self.inputs and not (i == 0 and 0 in self.inputs): Climaf_Operator_Error( "When defining %s : error in input sequence for rank %d" % (name, i + 1)) # # Check if command includes an argument allowing for # providing an output filename if command.find("${out") < 0: if format is not "txt": format = None # # Search in call arguments for keywords matching "<output_name>_var" # which may provide format string for 'computing' outputs variable # name from input variable name outvarnames = dict() pattern = r"^(.*)_var$" for p in kwargs: if re.match(pattern, p): outvarnames[re.findall(pattern, p)[0]] = kwargs[p] clogger.debug("outvarnames for script %s = %s" % (name, repr(outvarnames))) # # Analyze outputs names , associated variable names # (or format strings), and store it in attribute dict 'outputs' self.outputs = dict() it = re.finditer(r"\${out(_(?P<outname>[\w-]*))?}", command) for occ in it: outname = occ.group("outname") if outname is not None: if outname in outvarnames: self.outputs[outname] = outvarnames[outname] else: self.outputs[outname] = "%s" # outname else: self.outputs[None] = outvarnames.get('', "%s") self.outputs[''] = outvarnames.get('', "%s") # clogger.debug("outputs = "+`self.outputs`) # canSelectVar = canSelectVar or (command.find("${var}") > 0) canAggregateTime = (command.find("${ins}") > 0 or command.find("${ins_1}") > 0) canAlias = (command.find("${alias}") > 0) canMissing = (command.find("${missing}") > 0) canSelectTime = False if command.find("${period}") > 0 or command.find("${period_1}") > 0: canSelectTime = True if command.find("${period_iso}") > 0 or command.find( "${period_iso_1}") > 0: canSelectTime = True canSelectDomain = (command.find("${domain}") > 0 or command.find("${domain_1}") > 0) # self.name = name self.command = command self.fixedfields = None self.flags = scriptFlags(canOpendap, canSelectVar, canSelectTime, canSelectDomain, canAggregateTime, canAlias, canMissing, commuteWithEnsemble, commuteWithTimeConcatenation, commuteWithSpaceConcatenation) if format in known_formats or format in graphic_formats or format in none_formats: self.outputFormat = format else: raise Climaf_Operator_Error( "Allowed formats yet are : 'object', 'nc', 'txt', %s" % ', '.join([repr(x) for x in graphic_formats])) scripts[name] = self # Init doc string for the operator doc = "CliMAF wrapper for command : %s" % self.command # try to get a better doc string from colocated doc/directory docfilename = os.path.dirname( __file__) + "/../doc/scripts/" + name + ".rst" # print "docfilen= "+docfilename try: docfile = open(docfilename) doc = docfile.read() docfile.close() except: pass # # creates a function named as requested, which will invoke # capply with that name and same arguments defs = 'def %s(*args,**dic) :\n """%s"""\n return driver.capply("%s",*args,**dic)\n' \ % (name, doc, name) exec defs in globals() # exec "from climaf.operators import %s" % name in \ sys.modules['__main__'].__dict__ clogger.debug("CliMAF script %s has been declared" % name)
def register(filename, crs, outfilename=None): """ Adds in FILE a metadata named 'CRS_def' and with value CRS, and a metadata 'CLiMAF' with CliMAF version and ref URL Records this FILE in dict crs2filename If OUTFILENAME is not None, FILENAME is a temporary file and it's OUTFILENAME which is recorded in dict crs2filename Silently skip non-existing files """ # First read index from file if it is yet empty - No : done at startup # if len(crs2filename.keys()) == 0 : cload() # It appears that we have to let some time to the file system for updating its inode tables global dropped_crs if not stamping: clogger.debug('No stamping') crs2filename[crs] = filename return True waited = 0 while waited < 50 and not os.path.exists(filename): time.sleep(0.1) waited += 1 # time.sleep(0.5) if os.path.exists(filename): # while time.time() < os.path.getmtime(filename) + 0.2 : time.sleep(0.2) if re.findall(".nc$", filename): command = "ncatted -h -a CRS_def,global,o,c,\"%s\" -a CliMAF,global,o,c,\"CLImate Model Assessment " \ "Framework version %s (http://climaf.rtfd.org)\" %s" % (crs, version, filename) if re.findall(".png$", filename): crs2 = crs.replace("%", "\%") command = "convert -set \"CRS_def\" \"%s\" -set \"CliMAF\" \"CLImate Model Assessment Framework version " \ "%s (http://climaf.rtfd.org)\" %s %s.png && mv -f %s.png %s" % \ (crs2, version, filename, filename, filename, filename) if re.findall(".pdf$", filename): tmpfile = str(uuid.uuid4()) command = "pdftk %s dump_data output %s && echo -e \"InfoBegin\nInfoKey: Keywords\nInfoValue: %s\" >> %s " \ "&& pdftk %s update_info %s output %s.pdf && mv -f %s.pdf %s && rm -f %s" % \ (filename, tmpfile, crs, tmpfile, filename, tmpfile, filename, filename, filename, tmpfile) if re.findall(".eps$", filename): command = 'exiv2 -M"add Xmp.dc.CliMAF CLImate Model Assessment Framework version %s ' \ '(http://climaf.rtfd.org)" -M"add Xmp.dc.CRS_def %s" %s' % \ (version, crs, filename) clogger.debug("trying stamping by %s" % command) if os.system(command) == 0: if outfilename: cmd = 'mv -f %s %s ' % (filename, outfilename) if os.system(cmd) == 0: clogger.info("move %s as %s " % (filename, outfilename)) clogger.info("%s registered as %s" % (crs, outfilename)) crs2filename[crs] = outfilename if crs in dropped_crs: dropped_crs.remove(crs) return True else: clogger.critical("cannot move by" % cmd) exit() return None else: clogger.info("%s registered as %s" % (crs, filename)) crs2filename[crs] = filename if crs in dropped_crs: dropped_crs.remove(crs) return True else: clogger.critical("cannot stamp by %s" % command) exit() return None else: clogger.error("file %s does not exist (for crs %s)" % (filename, crs))
def __init__(self, valeur): self.valeur = valeur clogger.error(self.__str__()) dedent(100)
def __init__(self, valeur): self.valeur = valeur clogger.error(self.__str__()) cdedent(100)
def cview(datafile): if re.findall(".png$",datafile) : subprocess.Popen(["display",datafile,"&"]) else : clogger.error("cannot yet handle %s"%datafile) return None
def __init__(self,op, command, canOpendap=False, canSelectVar=False, canSelectTime=False, canSelectDomain=False, canAggregateTime=False, canAlias=False, canMissing=False, commuteWithEnsemble=False): clogger.error("Not yet developped")
def macro(name, cobj, lobjects=[]): """ Define a CliMAF macro from a CliMAF compound object. Transform a Climaf object in a macro, replacing all datasets, and the objects of lobjects, by a dummy argument. Register it in dict cmacros, if name is not None Args: name (string) : the name you want to give to the macro; a Python function with the same name will be defined cobj (CliMAF object, or string) : any CliMAF object, usually the result of a series of operators, that you would like to repeat using other input datasets; alternatively, you can provide the macro formula as a string (when accustomed to the syntax) lobjects (list, optional): for expert use- a list of objects, which are sub-objects of cobject, and which should become arguments of the macro Returns: a macro; the returned value is usualy not used 'as is' : a python function is also defined in module cmacros and in main namespace, and you may use it in the same way as a CliMAF operator. All the datasets involved in ``cobj`` become arguments of the macro, which allows you to re-do the same computations and easily define objects similar to ``cobjs`` Example:: >>> # First use and combine CliMAF operators to get some interesting result using some dataset(s) >>> january_ta=ds(project='example',simulation='AMIPV6ALB2G',variable='ta',frequency='monthly',period='198001') >>> ta_europe=llbox(january_ta,latmin=40,latmax=60,lonmin=-15,lonmax=25) >>> ta_ezm=ccdo(ta_europe,operator='zonmean') >>> fig_ezm=plot(ta_ezm) >>> # >>> # Using this result as an example, define a macro named 'eu_cross_section', >>> # which arguments will be the datasets involved in this result >>> cmacro('eu_cross_section',fig_ezm) >>> # >>> # You can of course apply a macro to another dataset(s) (even here to a 2D variable) >>> pr=ds(project='example',simulation='AMIPV6ALB2G', variable='pr', frequency='monthly', period='198001') >>> pr_ezm=eu_cross_section(pr) >>> # >>> # All macros are registered in dictionary climaf.cmacro.cmacros, >>> # which is imported by climaf.api; you can list it by : >>> cmacros Note : macros are automatically saved in file ~/.climaf.macros, and can be edited See also much more explanations in the example at :download:`macro.py <../examples/macro.py>` """ if isinstance(cobj, str): s = cobj # Next line used for interpreting macros's CRS exec("from climaf.cmacro import cdummy; ARG=cdummy()", sys.modules['__main__'].__dict__) try: cobj = eval(cobj, sys.modules['__main__'].__dict__) except: # usually case of a CRS which project is not currently defined clogger.error( "Cannot interpret %s with the projects currently define" % s) return None #print "string %s was interpreted as %s"%(s,cobj) domatch = False for o in lobjects: domatch = domatch or cobj==o or \ ( isinstance(cobj,cobject) and cobj.buildcrs() == o.buildcrs()) if isinstance(cobj, cdataset) or isinstance(cobj, cdummy) or domatch: return cdummy() elif isinstance(cobj, ctree): rep = ctree(cobj.operator, cobj.script, *cobj.operands, **cobj.parameters) rep.operands = map(macro, [None for o in rep.operands], rep.operands) elif isinstance(cobj, scriptChild): rep = scriptChild(macro(None, cobj.father), cobj.varname) elif isinstance(cobj, cpage): rep = cpage([ map(macro, [None for fig in line], line) for line in cobj.fig_lines ], cobj.widths, cobj.heights) elif isinstance(cobj, cens): d = dict() for k, v in zip( cobj.keys(), map(macro, [None for o in cobj.values()], cobj.values())): d[k] = v rep = cens(d) elif cobj is None: return None else: clogger.error("Cannot yet handle object :%s", ` cobj `) rep = None if name and rep: cmacros[name] = rep doc = "A CliMAF macro, which text is " + ` rep ` defs='def %s(*args) :\n """%s"""\n return instantiate(cmacros["%s"],[ x for x in args])\n'\ % (name,doc,name) exec defs in globals() exec "from climaf.cmacro import %s" % name in sys.modules[ '__main__'].__dict__ clogger.debug("Macro %s has been declared" % name) return rep
def lonlatvert_interpolation(dat1, dat2=None, vertical_levels=None, cdo_horizontal_grid='r1x90', horizontal_regridding=True): """ Interpolates a lon/lat/pres field dat1 via two possible ways: - either by providing a target lon/lat/pres field dat2 => dat1 is regridded both horizontally and vertically on dat2 - or by providing a list of vertical levels => dat1 is regridded horizontally on the cdo_horizontal_grid (default='r1x90'), and vertically on the list of vertical levels The user can provide the vertical levels (in Pa) like this: vertical_levels=[100000,85000,50000,20000,...] # or vertical_levels='100000,85000,50000,20000' Before the computations, the function checks the unit of the vertical axis; it is converted to Pa if necessary directly in the netcdf file(s) corresponding to dat1(2). >>> dat = ds(project='CMIP5',model='IPSL-CM5A-LR',variable='ua',period='1980-1985', experiment='historical',table='Amon') >>> ref = ds(project='ref_pcmdi',variable='ua',product='ERAINT') >>> zonmean_dat = zonmean(time_average(dat)) >>> zonmean_ref = zonmean(time_average(ref)) >>> dat_interpolated_on_ref = lonlatvert_interpolation(zonmean_dat,zonmean_ref) >>> dat_interpolated_on_list_of_levels = lonlatvert_interpolation(zonmean_dat,vertical_levels='100000,85000,50000,20000,10000,5000,2000,1000') """ from climaf.anynetcdf import ncf from climaf import cachedir file1 = cfile(dat1) clogger.debug('file1 = %s' % file1) ncfile1 = ncf(file1) # -- First, we check the unit of the vertical dimension of file1 levname1 = None for varname in ncfile1.variables: if varname.lower() in ['level', 'levels', 'lev', 'levs', 'depth', 'deptht', 'olevel'] or 'plev' in varname.lower(): levname1 = varname if not levname1: clogger.debug('Name of the vertical axis not found for dat1') levunits1 = ncfile1.variables[levname1].units if levunits1.lower() in ['hpa', 'millibar', 'mbar', 'hectopascal']: # -- Multiplier par 100 cscript('convert_plev_hPa_to_Pa', 'ncap2 -As "' + levname1 + '=' + levname1 + '*100" ${in} ' + cachedir + '/convert_to_Pa_tmp.nc ; ncatted -O -a units,' + levname1 + ',o,c,Pa ' + cachedir + '/convert_to_Pa_tmp.nc ; mv ' + cachedir + '/convert_to_Pa_tmp.nc ${out}') dat1 = climaf.operators.convert_plev_hPa_to_Pa(dat1) # -> The vertical axis of file1 is now set to Pa # # -- Second, we check the unit of the vertical dimension of file2 if dat2: file2 = cfile(dat2) clogger.debug('file2 = %s' % file2) ncfile2 = ncf(file2) levname2 = None for varname in ncfile2.variables: if varname.lower() in ['level', 'levels', 'lev', 'levs', 'depth', 'deptht', 'olevel'] or 'plev' in varname.lower(): levname2 = varname clogger.debug('levname2 = %s' % levname2) if not levname2: clogger.debug('Name of the vertical axis not found for dat2') levunits2 = ncfile2.variables[levname2].units clogger.debug('ncfile2 = %s' % ncfile2) try: levValues2 = ncfile2.variables[levname2].getValue() except: try: levValues2 = ncfile2.variables[levname2].data except: levValues2 = ncfile2[levname2][0:len(ncfile2[levname2])] if levunits2.lower() in ['hpa', 'millibar', 'mbar', 'hectopascal']: # -- Multiplier par 100 cscript('convert_plev_hPa_to_Pa', 'ncap2 -As "' + levname2 + '=' + levname2 + '*100" ${in} ' + cachedir + '/convert_to_Pa_tmp.nc ; ncatted -O -a units,' + levname2 + ',o,c,Pa ' + cachedir + '/convert_to_Pa_tmp.nc ; mv ' + cachedir + '/convert_to_Pa_tmp.nc ${out}') dat2 = climaf.operators.convert_plev_hPa_to_Pa(dat2) # -> The vertical axis of file2 is now set to Pa in the netcdf file scale = 100.0 else: scale = 1.0 # # --> We get the values of the vertical levels of dat2 (from the original file, that's why we apply a scale) levels = '' for lev in levValues2: levels = levels + ',' + str(lev * scale) # # --> We can now interpolate dat1 on dat2 verticaly and horizontally if horizontal_regridding: regridded_dat1 = ccdo(regrid(dat1, dat2, option='remapdis'), operator='intlevel' + levels) else: regridded_dat1 = ccdo(dat1, operator='intlevel' + levels) else: if vertical_levels: if isinstance(vertical_levels, list): levels = '' for lev in vertical_levels: levels = levels + ',' + str(lev) else: levels = ',' + vertical_levels if horizontal_regridding: regridded_dat1 = ccdo(regridn(dat1, cdogrid=cdo_horizontal_grid), operator='intlevel' + levels) else: regridded_dat1 = ccdo(dat1, operator='intlevel' + levels) else: clogger.error('--> Provide a list of vertical levels with vertical_levels') return regridded_dat1
def derive(project, derivedVar, Operator, *invars, **params): """ Define that 'derivedVar' is a derived variable in 'project', computed by applying 'Operator' to input streams which are datasets whose variable names take the values in ``*invars`` and the parameter/arguments of Operator take the values in ``**params`` 'project' may be the wildcard : '*' Example, assuming that operator 'minus' has been defined as :: >>> cscript('minus','cdo sub ${in_1} ${in_2} ${out}') which means that ``minus`` uses CDO for substracting the two datasets; you may define, for a given project 'CMIP5', a new variable e.g. for cloud radiative effect at the surface, named 'rscre', using the difference of values of all-sky and clear-sky net radiation at the surface by:: >>> derive('CMIP5', 'rscre','minus','rs','rscs') You may then use this variable name at any location you would use any other variable name Note : you may use wildcard '*' for the project Another example is rescaling or renaming some variable; here, let us define how variable 'ta' can be derived from ERAI variable 't' : >>> derive('erai', 'ta','rescale', 't', scale=1., offset=0.) **However, this is not the most efficient way to do that**. See :py:func:`~climaf.classes.calias()` Expert use : argument 'derivedVar' may be a dictionary, which keys are derived variable names and values are scripts outputs names; example :: >>> cscript('vertical_interp', 'vinterp.sh ${in} surface_pressure=${in_2} ${out_l500} ${out_l850} method=${opt}') >>> derive('*', {'z500' : 'l500' , 'z850' : 'l850'},'vertical_interp', 'zg', 'ps', opt='log'} """ # Action : register the information in a dedicated dict which keys # are single derived variable names, and which will be used at the # object evaluation step # Also : some consistency checks w.r.t. script definition if Operator in scripts: if not isinstance(derivedVar, dict): derivedVar = dict(out=derivedVar) for outname in derivedVar: if (outname != 'out' and (not getattr(Operator, "outvarnames", None) or outname not in Operator.outvarnames)): raise Climaf_Operator_Error( "%s is not a named ouput for operator %s; type help(%s)" % (outname, Operator, Operator)) s = scripts[Operator] if s.inputs_number() != len(invars): clogger.error( "number of input variables for operator %s is %d, which is inconsistent with " "script declaration : %s" % (s.name, len(invars), s.command)) return # TBD : check parameters number ( need to build # its list in cscript.init() ) if project not in derived_variables: derived_variables[project] = dict() derived_variables[project][derivedVar[outname]] = (Operator, outname, list(invars), params) elif Operator in operators: clogger.warning( "Cannot yet handle derived variables based on internal operators") else: clogger.error( "second argument (%s) must be a script or operator, already declared" % repr(Operator))
def cdrop(obj, rm=True, force=False): """ Deletes the cached file for a CliMAF object, if it exists Args: obj (cobject or string) : object to delete, or its string representation (CRS) force (bool) : should we delete the object even if it is 'protected' rm (bool) : for advanced use only; should we actually delete (rm) the file, or just forget it in CliMAF cache index Returns: None if object does not exists, False if failing to delete, True if OK Example :: >>> dg=ds(project='example', simulation='AMIPV6ALB2G', variable='tas', period='1980-1981') >>> f=cfile(dg) >>> os.system('ls -al '+f) >>> cdrop(dg) """ global crs2filename global dropped_crs if isinstance(obj, cobject): crs = repr(obj) if isinstance(obj, cdataset): crs = "select(" + crs + ")" elif type(obj) is str: crs = obj else: clogger.error("%s is not a CliMAF object" % repr(obj)) return if crs in crs2filename: clogger.info("Discarding cached value for %s (expect if protected)" % crs) fil = crs2filename[crs] if rm: try: if force: os.system("chmod +w " + fil) if not os.access(fil, os.W_OK): clogger.info("Object %s is protected" % crs) return path_file = os.path.dirname(fil) os.remove(fil) crs2filename.pop(crs) dropped_crs.append(crs) try: os.rmdir(path_file) except OSError as ex: clogger.warning(ex) return True except: clogger.warning( "When trying to remove %s : file does not exist in cache" % crs) return False else: clogger.info("%s is not cached" % crs) return None
def ceval_script (scriptCall,deep,recurse_list=[]): """ Actually applies a CliMAF-declared script on a script_call object Prepare operands as fiels and build command from operands and parameters list Assumes that scripts are described in dictionary 'scripts' by templates as documented in operators.cscript Returns a CLiMAF cache data filename """ script=operators.scripts[scriptCall.operator] template=Template(script.command) # Evaluate input data dict_invalues=dict() sizes=[] for op in scriptCall.operands : inValue=ceval(op,userflags=scriptCall.flags,format='file',deep=deep, recurse_list=recurse_list) if inValue is None or inValue is "" : raise Climaf_Driver_Error("When evaluating %s : value for %s is None"\ %(scriptCall.script,`op`)) if isinstance(inValue,list) : size=len(inValue) else : size=1 sizes.append(size) dict_invalues[op]=inValue # # Replace input data placeholders with filenames subdict=dict() opscrs="" if 0 in script.inputs : label,multiple,serie=script.inputs[0] op=scriptCall.operands[0] infile=dict_invalues[op] if not all(map(os.path.exists,infile.split(" "))) : raise Climaf_Driver_Error("Internal error : some input file does not exist among %s:"%(infile)) subdict[ label ]=infile #if scriptCall.flags.canSelectVar : subdict["var"]=varOf(op) if isinstance(op,classes.cdataset) and op.alias and scriptCall.flags.canAlias: filevar,scale,offset,units,filenameVar,missing=op.alias #if script=="select" and ((varOf(op) != filevar) or scale != 1.0 or offset != 0.) : if ((varOf(op) != filevar) or scale != 1.0 or offset != 0.) : subdict["alias"]="%s,%s,%.4g,%.4g"%(varOf(op),filevar,scale,offset) subdict["var"]=filevar if units : subdict["units"]=units if scriptCall.flags.canMissing and missing : subdict["missing"]=missing if isinstance(op,classes.cens) : if not multiple : raise Climaf_Driver_Error( "Script %s 's input #%s cannot accept ensemble %s"\ %(scriptCall.script,0,`op`)) #subdict["labels"]=r'"'+reduce(lambda x,y : "'"+x+"' '"+y+"'", op.labels)+r'"' subdict["labels"]=reduce(lambda x,y : x+"$"+y, op.labels) per=timePeriod(op) if not per.fx and str(per) != "" and scriptCall.flags.canSelectTime: subdict["period"]=str(per) subdict["period_iso"]=per.iso() if scriptCall.flags.canSelectDomain : subdict["domain"]=domainOf(op) i=0 for op in scriptCall.operands : opscrs += op.crs+" - " infile=dict_invalues[op] if not all(map(os.path.exists,infile.split(" "))) : raise Climaf_Driver_Error("Internal error : some input file does not exist among %s:"%(infile)) i+=1 if ( i> 1 or 1 in script.inputs) : label,multiple,serie=script.inputs[i] subdict[ label ]=infile # Provide the name of the variable in input file if script allows for subdict["var_%d"%i]=varOf(op) if isinstance(op,classes.cdataset) and op.alias : filevar,scale,offset,units,filenameVar,missing =op.alias if (varOf(op) != filevar) or (scale != 1.0) or (offset != 0.) : subdict["alias_%d"%i]="%s %s %f %f"%(varOf(op),filevar,scale,offset) subdict["var_%d"%i]=filevar if units : subdict["units_%d"%i]=units if missing : subdict["missing_%d"%i]=missing # Provide period selection if script allows for per=timePeriod(op) if not per.fx and per != "": subdict["period_%d"%i]=str(per) subdict["period_iso_%d"%i]=per.iso() subdict["domain_%d"%i]=domainOf(op) clogger.debug("subdict for operands is "+`subdict`) # substitution is deffered after scriptcall parameters evaluation, which may # redefine e.g period # # Provide one cache filename for each output and instantiates the command accordingly if script.outputFormat is not None : # Compute a filename for each ouptut # Un-named main output main_output_filename=cache.generateUniqueFileName(scriptCall.crs, format=script.outputFormat) subdict["out"]=main_output_filename subdict["out_"+scriptCall.variable]=main_output_filename # Named outputs for output in scriptCall.outputs: subdict["out_"+output]=cache.generateUniqueFileName(scriptCall.crs+"."+output,\ format=script.outputFormat) # Account for script call parameters for p in scriptCall.parameters : #clogger.debug("processing parameter %s=%s"%(p,scriptCall.parameters[p])) subdict[p]=scriptCall.parameters[p] if p=="period" : subdict["period_iso"]=init_period(scriptCall.parameters[p]).iso() subdict["crs"]=opscrs.replace("'","") # # Combine CRS and possibly member_label to provide/complement title if 'title' not in subdict : if 'member_label' in subdict : subdict["title"]=subdict['member_label'] else: subdict["title"]=subdict["crs"] else: if 'member_label' in subdict : subdict["title"]=subdict["title"]+" "+subdict['member_label'] subdict.pop('member_label') # # Substitute all args template=template.safe_substitute(subdict) # # Allowing for some formal parameters to be missing in the actual call: # # Discard remaining substrings looking like : # some_word='"${some_keyword}"' , or: # '"${some_keyword}"' template=re.sub(r'(\w*=)?(\'\")?\$\{\w*\}(\"\')?',r"",template) # # Discard remaining substrings looking like : # some_word=${some_keyword} , or # ${some_keyword} template=re.sub(r"(\w*=)?\$\{\w*\}",r"",template) # # Launch script using command, and check termination #command="PATH=$PATH:"+operators.scriptsPath+template+fileVariables #command="echo '\n\nstdout and stderr of script call :\n\t "+template+\ # "\n\n'> scripts.out ; "+ template+ " >> scripts.out 2>&1" tim1=time.time() clogger.info("Launching command:"+template) # command=subprocess.Popen(template, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) command.wait() # logfile=open('last.out', 'w') logfile.write("\n\nstdout and stderr of script call :\n\t "+template+"\n\n") command_std="" for line in command.stdout: command_std+=line logfile.write(line) logfile.close() if ( command.wait() == 0 ): if script.outputFormat is not None : # Tagging output files with their CliMAF Reference Syntax definition # Un-named main output ok = cache.register(main_output_filename,scriptCall.crs) # Named outputs for output in scriptCall.outputs: ok = ok and cache.register(subdict["out_"+output],\ scriptCall.crs+"."+output) if ok : duration=time.time() - tim1 print("Done in %.1f s with script computation for %s "%\ (duration,`scriptCall`),file=sys.stderr) clogger.debug("Done in %.1f s with script computation for " "%s (command was :%s )"%\ (duration,`scriptCall`,template)) return main_output_filename else : raise Climaf_Driver_Error("Some output missing when executing " ": %s. \n See last.out"%template) else : clogger.debug("script %s has no output"%script.name) return None else: clogger.debug("Full script output:\n"+command_std) comm2=subprocess.Popen(["tail", "-n", "10", "last.out"], stdout=subprocess.PIPE) clogger.error("Last lines of script output:\n"+comm2.stdout.read()) raise Climaf_Driver_Error("Script failure for : %s. More details either in file " "./last.out or by re-runing with clog(\"debug\")" %template)
def derive(project, derivedVar, Operator, *invars, **params) : """ Define that 'derivedVar' is a derived variable in 'project', computed by applying 'Operator' to input streams which are datasets whose variable names take the values in ``*invars`` and the parameter/arguments of Operator take the values in ``**params`` 'project' may be the wildcard : '*' Example , assuming that operator 'minus' has been defined as :: >>> cscript('minus','cdo sub ${in_1} ${in_2} ${out}') which means that ``minus`` uses CDO for substracting the two datasets; you may define, for a given project 'CMIP5', a new variable e.g. for cloud radiative effect at the surface, named 'rscre', using the difference of values of all-sky and clear-sky net radiation at the surface by:: >>> derive('CMIP5', 'rscre','minus','rs','rscs') You may then use this variable name at any location you would use any other variable name Note : you may use wildcard '*' for the project Another example is rescaling or renaming some variable; here, let us define how variable 'ta' can be derived from ERAI variable 't' : >>> derive('erai', 'ta','rescale', 't', scale=1., offset=0.) **However, this is not the most efficient way to do that**. See :py:func:`~climaf.classes.calias()` Expert use : argument 'derivedVar' may be a dictionary, which keys are derived variable names and values are scripts outputs names; example :: >>> cscript('vertical_interp', 'vinterp.sh ${in} surface_pressure=${in_2} ${out_l500} ${out_l850} method=${opt}') >>> derive('*', {'z500' : 'l500' , 'z850' : 'l850'},'vertical_interp', 'zg', 'ps', opt='log'} """ # Action : register the information in a dedicated dict which keys # are single derived variable names, and which will be used at the # object evaluation step # Also : some consistency checks w.r.t. script definition if Operator in scripts : if not isinstance(derivedVar,dict) : derivedVar=dict(out=derivedVar) for outname in derivedVar : if (outname != 'out' and (not getattr(Operator,"outvarnames",None) or outname not in Operator.outvarnames )): raise Climaf_Operator_Error( "%s is not a named ouput for operator %s; type help(%s)"%\ (outname,Operator,Operator)) s=scripts[Operator] if s.inputs_number() != len(invars) : clogger.error("number of input variables for operator" "%s is %d, which is inconsistent with " "script declaration : %s"\ %(s.name,len(invars),s.command)) return # TBD : check parameters number ( need to build # its list in cscript.init() ) if project not in derived_variables : derived_variables[project]=dict() derived_variables[project][derivedVar[outname]]=(Operator, outname, list(invars), params) elif Operator in operators : clogger.warning("Cannot yet handle derived variables based on internal operators") else : clogger.error("second argument must be a script or operator, already declared")
def macro(name, cobj, lobjects=[]): """ Define a CliMAF macro from a CliMAF compound object. Transform a Climaf object in a macro, replacing all datasets, and the objects of lobjects, by a dummy argument. Register it in dict cmacros, if name is not None Args: name (string) : the name you want to give to the macro; a Python function with the same name will be defined cobj (CliMAF object, or string) : any CliMAF object, usually the result of a series of operators, that you would like to repeat using other input datasets; alternatively, you can provide the macro formula as a string (when accustomed to the syntax) lobjects (list, optional): for expert use- a list of objects, which are sub-objects of cobject, and which should become arguments of the macro Returns: a macro; the returned value is usualy not used 'as is' : a python function is also defined in module cmacros and in main namespace, and you may use it in the same way as a CliMAF operator. All the datasets involved in ``cobj`` become arguments of the macro, which allows you to re-do the same computations and easily define objects similar to ``cobjs`` Example:: >>> # First use and combine CliMAF operators to get some interesting result using some dataset(s) >>> january_ta=ds(project='example',simulation='AMIPV6ALB2G',variable='ta',frequency='monthly',period='198001') >>> ta_europe=llbox(january_ta,latmin=40,latmax=60,lonmin=-15,lonmax=25) >>> ta_ezm=ccdo(ta_europe,operator='zonmean') >>> fig_ezm=plot(ta_ezm) >>> # >>> # Using this result as an example, define a macro named 'eu_cross_section', >>> # which arguments will be the datasets involved in this result >>> cmacro('eu_cross_section',fig_ezm) >>> # >>> # You can of course apply a macro to another dataset(s) (even here to a 2D variable) >>> pr=ds(project='example',simulation='AMIPV6ALB2G', variable='pr', frequency='monthly', period='198001') >>> pr_ezm=eu_cross_section(pr) >>> # >>> # All macros are registered in dictionary climaf.cmacro.cmacros, >>> # which is imported by climaf.api; you can list it by : >>> cmacros Note : macros are automatically saved in file ~/.climaf.macros, and can be edited See also much more explanations in the example at :download:`macro.py <../examples/macro.py>` """ if isinstance(cobj, str): s = cobj # Next line used for interpreting macros's CRS exec ("from climaf.cmacro import cdummy; ARG=cdummy()", sys.modules["__main__"].__dict__) cobj = eval(cobj, sys.modules["__main__"].__dict__) # print "string %s was interpreted as %s"%(s,cobj) domatch = False for o in lobjects: domatch = domatch or cobj == o or (isinstance(cobj, cobject) and cobj.buildcrs() == o.buildcrs()) if isinstance(cobj, cdataset) or isinstance(cobj, cdummy) or domatch: return cdummy() elif isinstance(cobj, ctree): rep = ctree(cobj.operator, cobj.script, *cobj.operands, **cobj.parameters) rep.operands = map(macro, [None for o in rep.operands], rep.operands) elif isinstance(cobj, scriptChild): rep = scriptChild(cmacro(None, cobj.father), cobj.varname) elif isinstance(cobj, cpage): rep = cpage( cobj.widths, cobj.heights, [map(cmacro, [None for fig in line], line) for line in cobj.fig_lines], cobj.orientation, ) elif cobj is None: return None else: clogger.error("Cannot yet handle object :%s", ` cobj `) rep = None if name and rep: cmacros[name] = rep doc = "A CliMAF macro, which text is " + ` rep ` defs = 'def %s(*args) :\n """%s"""\n return instantiate(cmacros["%s"],[ x for x in args])\n' % ( name, doc, name, ) exec defs in globals() exec "from climaf.cmacro import %s" % name in sys.modules["__main__"].__dict__ clogger.debug("Macro %s has been declared" % name) return rep
def selectGenericFiles(urls, return_wildcards=None,merge_periods_on=None,**kwargs): """ Allow to describe a ``generic`` file organization : the list of files returned by this function is composed of files which : - match the patterns in ``url`` once these patterns are instantiated by the values in kwargs, and - contain the ``variable`` provided in kwargs - match the `period`` provided in kwargs In the pattern strings, no keyword is mandatory. However, for remote files, filename pattern must include ${varname}, which is instanciated by variable name or ``filenameVar`` (given via :py:func:`~climaf.classes.calias()`); this is for the sake of efficiency (please complain if inadequate) Example : >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*${PERIOD}*.nc)'] /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc In the pattern strings, the keywords that can be used in addition to the argument names (e.g. ${model}) are: - ${variable} : use it if the files are split by variable and filenames do include the variable name, as this speed up the search - ${PERIOD} : use it for indicating the period covered by each file, if this is applicable in the file naming; this period can appear in filenames as YYYY, YYYYMM, YYYYMMDD, YYYYMMDDHHMM, either once only, or twice with separator ='-' or '_' - wildcards '?' and '*' for matching respectively one and any number of characters """ def store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards, merge_periods_on=None, fperiod=None,periods=None,periods_dict=None): """" """ if fperiod is not None and periods is not None : clogger.debug('Adding period %s'%fperiod) periods.append(fperiod) # for kw in kwargs : it=re.finditer(facets_regexp,f) for oc in it : try : facet_value=oc.group(kw) except : continue if type(kwargs[kw]) is str and ("*" in kwargs[kw] or "?" in kwargs[kw] ): if facet_value is not None : if kw not in wildcards : wildcards[kw]=set() wildcards[kw].add(facet_value) clogger.debug("Discover %s=%s for file=%s"%(kw,facet_value,f)) else : clogger.debug("Logic issue for kw=%s and file=%s"%(kw,f)) # if fperiod is not None and periods is not None : if merge_periods_on is None : key=None elif kw == merge_periods_on : key=facet_value else : #print "Skipping for kw=%s,sort=%s"%(kw,merge_periods_on) continue if key not in periods_dict: periods_dict[key]=set() #print "adding period %s for key %s"%(fperiod,key) periods_dict[key].add(fperiod) else: pass #print "no Adding period for %s=%s for %s"%(kw,facet_value,f) #print "end of store, periods_dict=",periods_dict, "wild=",wildcards rep=[] # periods=None # a list of periods available periods_dict=dict() # period=kwargs['period'] ; if period == "*" : periods=[] # List of all periods elif type(period) is str : period=init_period(period) # variable=kwargs['variable'] altvar=kwargs.get('filenameVar',variable) # # dicts of date patterns, for globbing and for regexp # digit="[0-9]" date_glob_patt={ "${PERIOD}" : "*" } # an ordered list of dates keywords date_keywords=date_glob_patt.keys() ; date_keywords.sort(reverse=True) # annee="%s{4}"%digit mois="(01|02|03|04|05|06|07|08|09|10|11|12)" jour="([0-3][0-9])" heure="(00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23)" minutes="[0-5][0-9]" date="%s(%s(%s(%s(%s)?)?)?)?"%(annee,mois,jour,heure,minutes) rperiod="(?P<period>(?P<start>%s)([_-](?P<end>%s))?)"%(date,date) date_regexp_patt={ "${PERIOD}" : rperiod } # an ordered list of dates regexp keywords date_regexp_keywords=date_regexp_patt.keys() ; date_regexp_keywords.sort(reverse=True) # # for l in urls : # Instantiate keywords in pattern with attributes values remote_prefix="" ; if re.findall(".*:.*",l) : remote_prefix=':'.join(l.split(":")[0:-1])+':' basename=l.split(":")[-1] # This discard the remote_prefix if any basename=basename.replace("//","/") my_template=Template(basename) template=my_template.safe_substitute(**kwargs) #print "template after attributes replace : "+template # # Construct a pattern for globbing dates temp2=template for k in date_keywords : temp2=temp2.replace(k,date_glob_patt[k]) # Do globbing with plain varname if remote_prefix : lfiles=sorted(glob_remote_data(remote_prefix, temp2)) clogger.debug("Remote globbing %d files for varname on %s : "%\ (len(lfiles),remote_prefix+temp2)) else: # local data lfiles=sorted(glob.glob(temp2)) clogger.debug("Before regexp filtering : Globbing %d files for varname on %s : "%(len(lfiles),temp2)) # Must filter with regexp, because * with glob is too inclusive alt=[] for f in lfiles : for k in date_keywords : if re.search(date_regexp_patt[k],f) : alt.append(f) continue lfiles=alt clogger.debug("Globbing %d files for varname on %s : "%(len(lfiles),temp2)) # # If unsuccessful using varname, try with filenameVar if len(lfiles)==0 and "filenameVar" in kwargs and kwargs['filenameVar'] : # Change value of facet 'variable' kwargs['variable']=kwargs['filenameVar'] template=my_template.safe_substitute(**kwargs) temp2=template for k in date_keywords : temp2=temp2.replace(k,date_glob_patt[k]) # # Do globbing with fileVarname if remote_prefix : # lfiles=sorted(glob_remote_data(remote_prefix, temp2)) clogger.debug("Remote globbing %d files for filenamevar on %s: "%\ (len(lfiles),remote_prefix+temp2)) else: # local data lfiles=sorted(glob.glob(temp2)) # Must filter with regexp, because * with glob is too inclusive alt=[] for f in lfiles : for k in date_keywords : if re.search(date_regexp_patt[k],f) : alt.append(f) continue lfiles=alt clogger.debug("Globbing %d files for filenamevar on %s: "%(len(lfiles),temp2)) # # For discovering values for those facets which are a wildcard, # construct a regexp with a group name for all facets (but period) alt_basename=basename.replace("?",".").replace("*",".*") alt_kwargs=kwargs.copy() for kw in kwargs : if type(kwargs[kw]) is str : # This excludes period attribute, which has a type alt_kwargs[kw]=kwargs[kw].replace("?",".").replace("*",".*") alt_basename=alt_basename.replace(r"${%s}"%kw,r"(?P<%s>%s)"%(kw,alt_kwargs[kw]),1) facets_regexp=Template(alt_basename).safe_substitute(**alt_kwargs) for k in date_regexp_keywords : facets_regexp=facets_regexp.replace(k,date_regexp_patt[k],1) facets_regexp=facets_regexp.replace(k,".*") wildcards=dict() #print "facets_regexp=",facets_regexp # # Construct regexp for extracting dates from filename date_regexp=None template_toreg=template.replace("*",".*").replace("?",r".").replace("+","\+") #print "template before searching dates : "+template_toreg for key in date_regexp_keywords : #print "searchin "+key+" in "+template start=template_toreg.find(key) if (start>=0 ) : date_regexp=template_toreg.replace(key,date_regexp_patt[key],1) #print "found ",key," dateregexp ->",date_regexp hasEnd=False start=date_regexp.find(key) #start=date_regexp.find(key) if (start >=0 ) : hasEnd=True date_regexp=date_regexp.replace(key,date_regexp_patt[key],1) #date_regexp=date_regexp.replace(key,date_regexp_patt[key],1) break #print "date_regexp before searching dates : "+date_regexp # for f in lfiles : #print "processing file "+f # # Extract file time period # fperiod=None if date_regexp : if "P<period>" in date_regexp : #print "date_rexgep=",date_regexp #print "f=",f #print "period=",re.sub(date_regexp,r'\g<period>',f) tperiod=re.sub(date_regexp,r'\g<period>',f) if tperiod==f : raise classes.Climaf_Error("Cannot find a period in %s with regexp %s"%(f,date_regexp)) fperiod=init_period(tperiod) else: date_regexp0=date_regexp #print "date_regexp for extracting dates : "+date_regexp0, "file="+f start=re.sub(date_regexp0,r'\1',f) if start==f: raise Climaf_Data_Error("Start period not found in %s using regexp %s"%(f,regexp0)) #? if hasEnd : end=re.sub(date_regexp0,r'\2',f) fperiod=init_period("%s-%s"%(start,end)) else : fperiod=init_period(start) #print "period for file %s is %s"%(f,fperiod) # # Filter file time period against required period else : if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \ kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) : # local data if not remote_prefix and \ ( (basename.find("${variable}")>=0) or variable=='*' or \ fileHasVar(f,variable) or (variable != altvar and fileHasVar(f,altvar)) ) : clogger.debug("adding fixed field :"+f) store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on) rep.append(f) # remote data elif remote_prefix : if (basename.find("${variable}")>=0) or variable=='*' or \ (variable != altvar and (f.find(altvar)>=0) ): clogger.debug("adding fixed field :"+remote_prefix+f) store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on) rep.append(remote_prefix+f) else: raise classes.Climaf_Error( "For remote files, filename pattern (%s) should include ${varname} "+\ "(which is instanciated by variable name or filenameVar)"%f) else : clogger.info("Cannot yet filter files re. time using only file content.") store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on) rep.append(f) # # If file period matches requested period, check similarly for variable # #print "fperiod=",fperiod #print "periods=",periods #print "inter=",period.intersects(fperiod) #print "date_regexp=",date_regexp if (fperiod and ( periods is not None or period.intersects(fperiod) )) \ or not date_regexp : # clogger.debug('Period is OK - Considering variable filtering on %s and %s for %s'%(variable,altvar,f)) # Filter against variable if (l.find("${variable}")>=0): clogger.debug('appending %s based on variable in filename'%f) store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on, fperiod,periods,periods_dict) rep.append(remote_prefix+f) continue if (f not in rep): # local data if not remote_prefix and \ (variable=='*' or "," in variable or fileHasVar(f,variable) or \ (altvar != variable and fileHasVar(f,altvar))) : # Should check time period in the file if not date_regexp clogger.debug('appending %s based on multi-var or var exists in file '%f) store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on, fperiod,periods,periods_dict) rep.append(f) continue # remote data elif remote_prefix : if variable=='*' or "," in variable or \ (variable != altvar and (f.find(altvar)>=0) ): # Should check time period in the file if not date_regexp clogger.debug('appending %s based on multi-var or altvar '%(remote_prefix+f)) store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards, merge_periods_on, fperiod,periods,periods_dict) rep.append(remote_prefix+f) continue else: mess="For remote files, filename pattern (%s) should include"%(remote_prefix+f) mess+=" ${varname} (which is instanciated by variable name or filenameVar)" raise classes.Climaf_Error(mess) else: if not fperiod : clogger.debug('not appending %s because period is None '%f) elif not period.intersects(fperiod) : clogger.debug('not appending %s because period doesn t intersect %s'%(f,period)) else: clogger.debug('not appending %s for some other reason %s'%(f)) # Break on first url with any matching data if len(rep)>0 : clogger.debug('url %s does match for '%l + `kwargs`) break # For wildcard facets, discover facet values + checks for facet in wildcards: s=wildcards[facet] if return_wildcards is not None : if facet=="period" : #print "s=",s," periods_dict=",periods_dict for val in periods_dict : periods_dict[val]=sort_periods_list(list(periods_dict[val])) clogger.info("Attribute period='*' has values %s"%(periods_dict)) return_wildcards["period"]=periods_dict else: if len(s) == 1 : s=s.pop() clogger.info("Attribute %s='%s' has matching value '%s'"%(facet,kwargs[facet],s)) return_wildcards[facet]=s else: rep=list(s); rep.sort() return_wildcards[facet]=rep message="Attribute %s='%s' has multiple values : %s"%(facet,kwargs[facet],list(s)) if return_wildcards : clogger.info(message) else: clogger.error(message) s=return_wildcards[facet] else: clogger.debug("return_wildcards is None") return rep
def __init__(self,name, command, format="nc", canOpendap=False, commuteWithTimeConcatenation=False, commuteWithSpaceConcatenation=False, **kwargs): """ Declare a script or binary as a 'CliMAF operator', and define a Python function with the same name Args: name (str): name for the CliMAF operator. command (str): script calling sequence, according to the syntax described below. format (str): script outputs format -- either 'nc' or 'png' or 'None'; defaults to 'nc' canOpendap (bool, optional): is the script able to use OpenDAP URIs ? default to False commuteWithTimeConcatenation (bool, optional): can the operation commute with concatenation of time periods ? set it to true, if the operator can be applied on time chunks separately, in order to allow for incremental computation / time chunking; defaults to False commuteWithSpaceConcatenation (bool, optional): can the operation commute with concatenation of space domains ? defaults to False (see commuteWithTimeConcatenation) **kwargs : possible keyword arguments, with keys matching '<outname>_var', for providing a format string allowing to compute the variable name for output 'outname' (see below). Returns: None The script calling sequence pattern string (arg 'command') indicates how to build the system call which actually launches the script, with a match between python objects and formal arguments; For introducing the syntax, please consider this example, with the following commands:: >>> cscript('mycdo','cdo ${operator} ${in} ${out}') >>> # define some dataset >>> tas_ds = ds(project='example', simulation='AMIPV6', variable='tas', period='1980-1981') >>> # Apply operator 'mycdo' to dataset 'tas_ds', choosing a given 'operator' argument >>> tas_avg = mycdo(tas_ds,operator='timavg') CliMAF will later on launch this call behind the curtain:: $ cdo tim_avg /home/my/tmp/climaf_cache/8a/5.nc /home/my/tmp/climaf_cache/4e/4.nc where : - the last filename is generated by CliMAF from the formal exprerssion describing 'tas_avg' - the first filename provide a file generated by CliMAF which includes the required data fot tas_ds There are a number of examples in module :download:`standard_operators <../climaf/standard_operators.py>`. **Detailed syntax**: - formal arguments appear as : ``${argument}`` (in the example : ``${in}``, ``${out}``, ``${operator}`` ) - except for reserved keywords, arguments in the pattern will be replaced by the values for corresponding keywords used when invoking the diagnostic operator: - in the example above : argument ``operator`` is replaced by value ``timavg``, which is a keyword known to the external binary called, CDO - reserved argument keywords are : - **in, in_<digit>, ins, ins_<digit>, mmin** : they will be replaced by CliMAF managed filenames for input data, as deduced from dataset description or upstream computation; these filenames can actually be remote URLs (if the script can use OpenDAP, see args), local 'raw' data files, or CliMAF cache filenames - **in** stands for the URL of the first dataset invoked in the operator call - **in_<digit>** stands for the next ones, in the same order - **ins** and **ins_<digit>** stand for the case where the script can select input from multiple input files or URLs (e.g. when the whole period to process spans over multiple files); in that case, a single string (surrounded with double quotes) will carry multiple URLs - **mmin** stands for the case where the script accepts an ensemble of datasets (only for first input stream yet). CliMAF will replace the keyword by a string composed of the corresponding input filenames (not surrounded by quotes - please add them yourself in declaration); see also ``labels`` below - **var, var_<digit>** : when a script can select a variable in a multi-variable input stream, this is declared by adding this keyword in the calling sequence; CliMAF will replace it by the actual variable name to process; 'var' stands for first input stream, 'var_<digit>' for the next ones; - in the example above, we assume that external binary CDO is not tasked with selecting the variable, and that CliMAF must feed CDO with a datafile where it has already performed the selection - **period, period_<digit>** : when a script can select a time period in the content of a file or stream, it should declare it by putting this keyword in the pattern, which will be replaced at call time by the period written as <date1>-<date2>, where date is formated as YYYYMMDD ; - time intervals must be interpreted as [date1, date2[ - 'period' stands for the first input_stream, - 'period_<n>' for the next ones, in the order of actual call; - in the example above, this keyword is not used, which means that CliMAF has to select the period upstream of feeding CDO with the data - **period_iso, period_iso_<digit>** : as for **period** above, except that the date formating fits CDO conventions : - date format is ISO : YYYY-MM-DDTHH:MM:SS - interval is [date1,date2_iso], where date2_iso is 1 minute before date2 - separator between dates is : , - **domain, domain_<digit>** : when a script can select a domain in the input grid, this is declared by adding this keyword in the calling sequence; CliMAF will replace it by the domain definition if needed, as 'latmin,latmax,lonmin,lonmax' ; 'domain' stands for first input stream, 'domain_<digit>' for the next ones : - in the example above, we assume that external binary CDO is not tasked with selecting the domain, and that CliMAF must feed CDO with a datafile where it has already performed the selection - **out, out_<word>** : CliMAF provide file names for output files (if there is no such field, the script will have only 'side effects', e.g. launch a viewer). Main output file must be created by the script with the name provided at the location of argument ${out}. Using arguments like 'out_<word>' tells CliMAF that the script provide some secondary output, which will be symbolically known in CliMAF syntax as an attribute of the main object; by default, the variable name of each output equals the name of the output (except for the main ouput, which variable name is supposed to be the same as for the first input); for other cases, see argument \*\*kwargs to provide a format string, used to derive the variable name from first input variable name as in e.g. : ``output2_var='std_dev(%s)'`` for the output labelled output2 (i.e. declared as '${out_output2}') - in the example above, we just apply the convention used by CDO, which expects that you provide an output filename as last argument on the command line. See example mean_and_sdev in doc for advanced usage. - **crs** : will be replaced by the CliMAF Reference Syntax expression describing the first input stream; can be useful for plot title or legend - **alias** : means that the script can make an on the fly re-scaling and renaming of a variable. Will be replaced by a string which pattern is : 'new_varname,file_varname,scale,offset'. The script should then transform on reading as new_varname = file_varname * scale + offset - **units, units_<digit>** : means that the script can set the units on-the-fly while reading one of the input streams - **missing** : means that the script can make an on-the-fly transformation of a givent constant to missing values - **labels** : for script accepting ensembles, CliMAF will replace this keyword by a string bearing the labels associated with the ensemble, with delimiter $ as e.g. in: "CNRM-CM5 is fine$IPSL-CM5-LR is not bad$CCSM-29 is ..." """ # Check that script name do not clash with an existing symbol if name in sys.modules['__main__'].__dict__ and name not in scripts : clogger.error("trying to define %s as an operator, " "while it exists as smthing else"%name) return None if name in scripts : clogger.warning("Redefining CliMAF script %s"%name) # # Check now that script is executable scriptcommand=command.split(' ')[0].replace("(","") ex=subprocess.Popen(['which',scriptcommand], stdout=subprocess.PIPE) if ex.wait() != 0 : Climaf_Operator_Error("defining %s : command %s is not " "executable"%(name,scriptcommand)) executable=ex.stdout.read().replace('\n','') # # Analyze inputs field keywords and populate dict # attribute 'inputs' with some properties self.inputs=dict() commuteWithEnsemble=True it=re.finditer( r"\${(?P<keyw>(?P<mult>mm)?in(?P<serie>s)?(_(?P<n>([\d]+)))?)}", command) for oc in it : if (oc.group("n") is not None) : rank=int(oc.group("n")) else : rank=0 if rank in self.inputs : Climaf_Operator_Error( "When defining %s : duplicate declaration for input #%d"%\ (name,rank)) serie=(oc.group("serie") is not None) multiple=(oc.group("mult") is not None) if multiple : if rank != 0 : raise Climaf_Operator_Error( "Only first operand may accept members") if serie : raise Climaf_Operator_Error( "Operand %s cannot both accept" "members and files set"%oc.group("keyw")) commuteWithEnsemble=False self.inputs[rank]=(oc.group("keyw"),multiple,serie) if len(self.inputs)==0 : Climaf_Operator_Error( "When defining %s : command %s must include at least one of " "${in} ${ins} ${mmin} or ${in_..} ... for specifying how CliMAF" " will provide the input filename(s)"% (name,command)) #print self.inputs for i in range(len(self.inputs)) : if i+1 not in self.inputs and not ( i == 0 and 0 in self.inputs) : Climaf_Operator_Error( "When defining %s : error in input sequence for rank %d"%\ (name,i+1)) # # Check if command includes an argument allowing for # providing an output filename if command.find("${out") < 0 : format=None # # Search in call arguments for keywords matching "<output_name>_var" # which may provide format string for 'computing' outputs variable # name from input variable name outvarnames=dict() ; pattern=r"^(.*)_var$" for p in kwargs : if re.match(pattern,p): outvarnames[re.findall(pattern,p)[0]]=kwargs[p] #clogger.debug("outvarnames = "+`outvarnames`) # # Analyze outputs names , associated variable names # (or format strings), and store it in attribute dict 'outputs' self.outputs=dict() it=re.finditer(r"\${out(_(?P<outname>[\w-]*))?}",command) for occ in it : outname=occ.group("outname") if outname is not None : if (outname in outvarnames) : self.outputs[outname]=outvarnames[outname] else : self.outputs[outname]=outname else: self.outputs[None]="%s" #clogger.debug("outputs = "+`self.outputs`) # canSelectVar= (command.find("${var}") > 0 ) canAggregateTime=(command.find("${ins}") > 0 or command.find("${ins_1}") > 0) canAlias= (command.find("${alias}") > 0 ) canMissing= (command.find("${missing}") > 0 ) canSelectTime=False if command.find("${period}") > 0 or command.find("${period_1}") > 0 : canSelectTime=True if command.find("${period_iso}") > 0 or command.find("${period_iso_1}") > 0 : canSelectTime=True canSelectDomain=(command.find("${domain}") > 0 or command.find("${domain_1}") > 0) # self.name=name self.command=command self.flags=scriptFlags(canOpendap, canSelectVar, canSelectTime, \ canSelectDomain, canAggregateTime, canAlias, canMissing,\ commuteWithEnsemble,\ commuteWithTimeConcatenation, commuteWithSpaceConcatenation ) self.outputFormat=format scripts[name]=self # Init doc string for the operator doc="CliMAF wrapper for command : %s"%self.command # try to get a better doc string from colocated doc/directory docfilename=os.path.dirname(__file__)+"/../doc/scripts/"+name+".rst" #print "docfilen= "+docfilename try: docfile=open(docfilename) doc=docfile.read() docfile.close() except: pass # # creates a function named as requested, which will invoke # capply with that name and same arguments defs='def %s(*args,**dic) :\n """%s"""\n return driver.capply("%s",*args,**dic)\n'\ % (name,doc,name) exec defs in globals() # exec "from climaf.operators import %s"%name in \ sys.modules['__main__'].__dict__ clogger.debug("CliMAF script %s has been declared"%name)