def hasMatchingObject(cobject,ds_func) : """ If the cache holds a file which represents an object with the same nodes as COBJECT and which leaves/datasets, when paired with those of COBJECT and applying ds_func, returns an identical (and not None) value for all pairs, then returns its filename, its CRS and this value (for the first one in dict crs2filename) Can be applied for finding same object with included or including time-period """ # First read index from file if it is yet empty - No : done at startup # if len(crs2filename.keys()) == 0 : cload() def op_squeezes_time(operator): import operators return not operators.scripts[operator].flags.commuteWithTimeConcatenation # for crs in crs2filename.copy() : try: co=eval(crs, sys.modules['__main__'].__dict__) altperiod=compare_trees(co,cobject, ds_func,op_squeezes_time) if altperiod : if os.path.exists(crs2filename[crs]) : return co,altperiod else : clogger.debug("Removing %s from cache index, because file is missing",crs) crs2filename.pop(crs) except : pass # usually case of a CRS which project is not currently defined return None,None
def register(filename,crs): """ Adds in FILE a metadata named CRS_def and with value CRS. Records this FILE in dict crs2filename Silently skip non-existing files """ # First read index from file if it is yet empty if len(crs2filename.keys()) == 0 : cload() # It appears that we have to allow the file system some time for updating its inode tables waited=0 while waited < 10 and not os.path.exists(filename) : time.sleep(0.5) waited += 1 time.sleep(0.5) if os.path.exists(filename) : #while time.time() < os.path.getmtime(filename) + 0.2 : time.sleep(0.2) if re.findall(".nc$",filename) : command="ncatted -h -a CRS_def,global,o,c,\"%s\" %s"%(crs,filename) if re.findall(".png$",filename) : command="convert -set \"CRS_def\" \"%s\" %s %s.png && mv -f %s.png %s"%\ (crs,filename,filename,filename,filename) clogger.debug("trying stamping by %s"%command) if ( os.system(command) == 0 ) : crs2filename[crs]=filename clogger.info("%s registered as %s"%(crs,filename)) return True else : clogger.critical("cannot stamp by %s"%command) return None else : clogger.error("file %s does not exist (for crs %s)"%(filename,crs))
def set_variable(obj, varname, format) : """ Change to VARNAME the variable name for OBJ, which FORMAT maybe 'file' or 'MaskedArray'. Also set the variable long_name using CF convention (TBD) """ if obj is None : return None long_name=CFlongname(varname) if (format=='file') : oldvarname=varOfFile(obj) if (oldvarname != varname) : command="ncrename -v %s,%s %s >/dev/null 2>&1"%(oldvarname,varname,obj) if ( os.system(command) != 0 ) : clogger.error("Issue with changing varname to %s in %s"%(varname,obj)) return None clogger.debug("Varname changed to %s in %s"%(varname,obj)) command="ncatted -a long_name,%s,o,c,%s %s"%(varname,long_name,obj) if ( os.system(command) != 0 ) : clogger.error("Issue with changing long_name for var %s in %s"% (varname,obj)) return None return True elif (format=='MaskedArray') : clogger.warning('TBD - Cannot yet set the varname for MaskedArray') else : clogger.error('Cannot handle format %s'%format)
def capply(climaf_operator, *operands, **parameters): """ Builds the object representing applying a CliMAF operator (script, function or macro) Returns results as a list of CliMAF objects and stores them if auto-store is on """ res=None if operands is None or operands[0] is None : raise Climaf_Driver_Error("Operands is None") opds=map(str,operands) if climaf_operator in operators.scripts : #clogger.debug("applying script %s to"%climaf_operator + `opds` + `parameters`) res=capply_script(climaf_operator, *operands, **parameters) # Evaluate object right now if there is no output to manage op=operators.scripts[climaf_operator] if op.outputFormat is None : ceval(res,userflags=copy.copy(op.flags)) elif climaf_operator in cmacro.cmacros : if (len(parameters) > 0) : raise Climaf_Driver_Error("Macros cannot be called with keyword args") clogger.debug("applying macro %s to"%climaf_operator + `opds` ) res=cmacro.instantiate(cmacro.cmacros[climaf_operator],*operands) elif climaf_operator in operators.operators : clogger.debug("applying operator %s to"%climaf_operator + `opds` + `parameters`) res=capply_operator(climaf_operator,*operands, **parameters) else: clogger.error("%s is not a known operator nor script"%climaf_operator) return res
def register(filename,crs): """ Adds in FILE a metadata named 'CRS_def' and with value CRS, and a metadata 'CLiMAF' with CliMAF version and ref URL Records this FILE in dict crs2filename Silently skip non-existing files """ # First read index from file if it is yet empty - No : done at startup # if len(crs2filename.keys()) == 0 : cload() # It appears that we have to let some time to the file system for updating its inode tables waited=0 while waited < 20 and not os.path.exists(filename) : time.sleep(0.1) waited += 1 #time.sleep(0.5) if os.path.exists(filename) : #while time.time() < os.path.getmtime(filename) + 0.2 : time.sleep(0.2) if re.findall(".nc$",filename) : command="ncatted -h -a CRS_def,global,o,c,\"%s\" -a CliMAF,global,o,c,\"CLImate Model Assessment Framework version %s (http://climaf.rtfd.org)\" %s"%\ (crs,version,filename) if re.findall(".png$",filename) : command="convert -set \"CRS_def\" \"%s\" -set \"CliMAF\" \"CLImate Model Assessment Framework version %s (http://climaf.rtfd.org)\" %s %s.png && mv -f %s.png %s"%\ (crs,version,filename,filename,filename,filename) clogger.debug("trying stamping by %s"%command) if ( os.system(command) == 0 ) : crs2filename[crs]=filename clogger.info("%s registered as %s"%(crs,filename)) return True else : clogger.critical("cannot stamp by %s"%command) return None else : clogger.error("file %s does not exist (for crs %s)"%(filename,crs))
def crewrite(crs,alsoAtTop=True): """ Return the crs expression with sub-trees replaced by macro equivalent when applicable Search order is : from CRS tree root try all macros, then do the same for first subtree, and recursively in depth, and then go to second subtreesecond """ # Next line used for interpreting macros's CRS exec("ARG=climaf.cmacro.cdummy()", sys.modules['__main__'].__dict__) # try : co=eval(crs, sys.modules['__main__'].__dict__) except: clogger.debug("Issue when rewriting %s"%crs) return(crs) if isinstance(co,ctree) or isinstance(co,scriptChild) or isinstance(co,cpage) : if alsoAtTop : for m in cmacros : clogger.debug("looking at macro : "+m+"="+`cmacros[m]`+\ " \ncompared to : "+`macro(None,co)`) argl=cmatch(cmacros[m],co) if len(argl) > 0 : rep=m+"(" for arg in argl : rep+=crewrite(arg.buildcrs(crsrewrite=crewrite))+"," rep+=")"; rep=rep.replace(",)",")") return rep # No macro matches at top level, or top level not wished. # Let us dig a bit return(co.buildcrs(crsrewrite=crewrite)) else : return(crs)
def cmatch(macro, cobj): """ Analyze if macro does match cobj, and return the list of objects matching macro arguments, ordered by depth-first traversal """ clogger.debug("matching " + ` macro ` + " and " + ` cobj `) if isinstance(cobj, ctree) and isinstance(macro, ctree) and macro.operator == cobj.operator: nok = False for mpara, para in zip(macro.parameters, cobj.parameters): if mpara != para or macro.parameters[para] != cobj.parameters[para]: nok = True if nok: return [] argsub = [] for mop, op in zip(macro.operands, cobj.operands): if isinstance(mop, cdummy): argsub.append(op) else: argsub += cmatch(mop, op) return argsub elif isinstance(cobj, scriptChild) and isinstance(macro, scriptChild) and macro.varname == cobj.varname: return cmatch(macro.father, cobj.father, argslist) elif isinstance(cobj, cpage) and isinstance(macro, cpage): argsub = [] if cobj.heights == macro.heights and cobj.widths == macro.widths and cobj.orientation == macro.orientation: for mlines, lines in zip(macro.fig_lines, cobj.fig_lines): for mfig, fig in zip(mlines, lines): if isinstance(mfig, cdummy): argsub.append(fig) else: argsub += cmatch(mfig, fig) return argsub else: return []
def cshow(obj) : """ Provide the in-memory value of a CliMAF object. For a figure object, this will lead to display it ( launch computation if needed. ) """ clogger.debug("cshow called on "+str(obj)) return climaf.driver.ceval(obj,format='MaskedArray')
def is_derived_variable(variable,project): """ True if the variable is a derived variable, either in provided project or in wildcard project '*' """ rep= (project in derived_variables and variable in derived_variables[project] or \ "*" in derived_variables and variable in derived_variables["*"]) clogger.debug("Checking if variable %s is derived for project %s : %s"%(variable,project,rep)) return(rep)
def selectCmip5DrsFiles(urls, **kwargs) : # example for path : CMIP5/output1/CNRM-CERFACS/CNRM-CM5/1pctCO2/mon/atmos/ # Amon/r1i1p1/v20110701/clivi/clivi_Amon_CNRM-CM5_1pctCO2_r1i1p1_185001-189912.nc # second path segment can be any string (allows for : output,output1, merge...), # but if 'merge' exists, it is used alone # If version is 'last', tries provide version from directory 'last' if available, # otherwise those of last dir project=kwargs['project'] model=kwargs['model'] simulation=kwargs['simulation'] frequency=kwargs['frequency'] variable=kwargs['variable'] realm=kwargs['realm'] table=kwargs['table'] period=kwargs['period'] experiment=kwargs['experiment'] version=kwargs['version'] # rep=[] frequency2drs=dict({'monthly':'mon'}) freqd=frequency if frequency in frequency2drs : freqd=frequency2drs[frequency] # TBD : analyze ambiguity of variable among realms+tables for l in urls : pattern1=l+"/"+project+"/merge" if not os.path.exists(pattern1) : pattern1=l+"/"+project+"/*" patternv=pattern1+"/*/"+model+"/"+experiment+"/"+freqd+"/"+realm+"/"+table+"/"+simulation # Get version directories list ldirs=glob.glob(patternv) #print "looking at "+patternv+ " gives:" +`ldirs` for repert in ldirs : lversions=os.listdir(repert) lversions.sort() #print "lversions="+`lversions`+ "while version="+version cversion=version # initial guess of the version to use if (version == "last") : if (len(lversions)== 1) : cversion=lversions[0] elif (len(lversions)> 1) : if "last" in lversions : cversion="last" else : cversion=lversions[-1] # Assume that order provided by sort() is OK #print "using version "+cversion+" for requested version: "+version lfiles=glob.glob(repert+"/"+cversion+"/"+variable+"/*.nc") #print "listing "+repert+"/"+cversion+"/"+variable+"/*.nc" #print 'lfiles='+`lfiles` for f in lfiles : if freqd != 'fx' : #clogger.debug("checking period for "+ f) regex=r'^.*([0-9]{4}[0-9]{2}-[0-9]{4}[0-9]{2}).nc$' fileperiod=init_period(re.sub(regex,r'\1',f)) if (fileperiod and period.intersects(fileperiod)) : rep.append(f) else : clogger.debug("adding fixed field "+ f) rep.append(f) return rep
def hasRawVariable(self) : """ Test local data files to tell if a dataset variable is actually included in files (rather than being a derived, virtual variable) For the time being, returns False, which leads to always consider that variables declared as 'derived' actually are derived """ clogger.debug("TBD: actually test variables in files, rather than assuming that variable %s is virtual for dataset %s"\ %(self.variable,self.crs)) return(False)
def is_derived_variable(variable, project): """ True if the variable is a derived variable, either in provided project or in wildcard project '*' """ rep= (project in derived_variables and variable in derived_variables[project] or \ "*" in derived_variables and variable in derived_variables["*"]) clogger.debug("Checking if variable %s is derived for project %s : %s" % (variable, project, rep)) return (rep)
def vertical_average(dat, zmin, zmax): """ Computes a vertical average on the vertical levels between zmin and zmax """ levs = getLevs(dat, zmin, zmax) clogger.debug(' --> Compute average on the following vertical levels : ' + levs) tmp = ccdo(dat, operator="'vertmean -sellevel,'+levs'") return tmp
def hasExactObject(cobject) : # First read index from file if it is yet empty # NO! : done at startup - if len(crs2filename.keys()) == 0 : cload() if cobject.crs in crs2filename : f=crs2filename[cobject.crs] if os.path.exists(f) : return f else : clogger.debug("Dropping cobject.crs from cache index, because file si missing") crs2filename.pop(cobject.crs)
def derived_variable(variable,project): """ Returns the entry defining a derived variable in requested project or in wildcard project '*' """ if project in derived_variables and variable in derived_variables[project] : rep=derived_variables[project][variable] elif "*" in derived_variables and variable in derived_variables["*"] : rep=derived_variables['*'][variable] else : rep=None clogger.debug("Derived variable %s for project %s is %s"%(variable,project,rep)) return(rep)
def cexport(*args,**kwargs) : """ Alias for climaf.driver.ceval. Create synonyms for arg 'format' """ clogger.debug("cexport called with arguments"+str(args)) if "format" in kwargs : if (kwargs['format']=="NetCDF" or kwargs['format']=="netcdf" or kwargs['format']=="nc") : kwargs['format']="file" if (kwargs['format']=="MA") : kwargs['format']="MaskedArray" return climaf.driver.ceval(*args,**kwargs)
def selectEmFiles(**kwargs): #Pour A et L : mon, day1, day2, 6hLev, 6hPlev, 3h simulation = kwargs['simulation'] frequency = kwargs['frequency'] variable = kwargs['variable'] period = kwargs['period'] realm = kwargs['realm'] # freqs = {"mon": "", "3h": "_3h"} f = frequency if f in freqs: f = freqs[f] rep = [] # Must look for all realms, here identified by a single letter if realm == "*": lrealm = ["A", "L", "O", "I"] else: lrealm = [realm] for realm in lrealm: clogger.debug("Looking for realm " + realm) # Use EM data for finding data dir freq_for_em = f if realm == 'I': freq_for_em = "" # This is a special case ... command = [ "grep", "^export EM_DIRECTORY_" + realm + freq_for_em + "=", os.path.expanduser(os.getenv("EM_HOME")) + "/expe_" + simulation ] try: ex = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except: clogger.error("Issue getting archive_location for " + simulation + " for realm " + realm + " with: " + ` command `) break if ex.wait() == 0: dir = ex.stdout.read().split("=")[1].replace('"', "").replace("\n", "") clogger.debug("Looking at dir " + dir) if os.path.exists(dir): lfiles = os.listdir(dir) for fil in lfiles: #clogger.debug("Looking at file "+fil) fileperiod = periodOfEmFile(fil, realm, f) if fileperiod and period.intersects(fileperiod): if fileHasVar(dir + "/" + fil, variable): rep.append(dir + "/" + fil) #clogger.debug("Done with Looking at file "+fil) else: clogger.error( "Directory %s does not exist for simulation %s, realm %s " "and frequency %s" % (dir, simulation, realm, f)) else: clogger.info("No archive location found for " + simulation + " for realm " + realm + " with: " + ` command `) return rep
def derived_variable(variable, project): """ Returns the entry defining a derived variable in requested project or in wildcard project '*' """ if project in derived_variables and variable in derived_variables[project]: rep = derived_variables[project][variable] elif "*" in derived_variables and variable in derived_variables["*"]: rep = derived_variables['*'][variable] else: rep = None clogger.debug("Derived variable %s for project %s is %s" % (variable, project, rep)) return rep
def generateUniqueFileName_safe(expression, operator=None, format="nc"): """ Generate a filename path from string EXPRESSION and FILEFORMAT, unique for the expression and the set of cache directories currently listed in cache.cachedirs OPERATOR may be a function that provides a prefix, using EXPRESSION This uses hashlib.sha224, which are truncated to 3 (or more) characters. More characters are used if a shorter name is already in use for another expression in one of the known cache directories Generated names drive a structure where each directory name 1 or 2 characters and file names have no more characters Exits if uniqueness is unachievable (quite unexpectable !) """ # if format == None: return "" prefix = "" if operator is not None: prefix2 = operator(expression) if prefix2 is not None: prefix = prefix2 + "/" full = hashlib.sha224(expression).hexdigest() number = fileNameLength guess = full[0:number - 1] existing = searchFile(prefix + stringToPath(guess, directoryNameLength) + "." + format) if existing: readCRS = getCRS(existing) # Update index if needed if readCRS not in crs2filename: clogger.warning("existing data %s in file %s was not yet registered in cache index"%\ (readCRS,existing)) crs2filename[readCRS] = existing while ((existing is not None) and (readCRS != expression)): clogger.debug("must skip %s which CRS is %s"%\ (existing, getCRS(existing) )) number += 2 if (number >= len(full)): clogger.critical("Critical issue in cache : " + len(full) + " digits is not enough for " + expression) exit guess = full[0:number - 1] existing = searchFile(prefix + stringToPath(guess, directoryNameLength) + "." + format) if existing: readCRS = getCRS(existing) rep = currentCache + "/" + prefix + stringToPath( full[0:number - 1], directoryNameLength) + "." + format rep = os.path.expanduser(rep) # Create the relevant directory, so that user scripts don't have to care dirn = os.path.dirname(rep) if not os.path.exists(dirn): os.makedirs(dirn) clogger.debug("returning %s" % rep) return (rep)
def searchFile(path): """ Search for first occurrence of PATH as a path in all directories listed in CACHEDIRS """ for cdir in cachedirs : candidate=os.path.expanduser(cdir+"/"+path) if os.path.lexists(candidate): # If this is a broken link, delete it ~ silently and return None if not os.path.exists(candidate): clogger.debug("Broken link for %s was deleted"%candidate) os.remove(candidate) return None return candidate
def baseFiles(self,force=False): """ Returns the list of (local) files which include the data for the dataset Use cached value unless called with arg force=True """ if force or self.files is None : dic=self.kvp.copy() if self.alias : filevar,scale,offset,units,filenameVar,missing=self.alias dic["variable"]=filevar if filenameVar : dic["filenameVar"]=filenameVar clogger.debug("Looking with dic=%s"%`dic`) self.files=dataloc.selectLocalFiles(**dic) return self.files
def searchFile(path): """ Search for first occurrence of PATH as a path in all directories listed in CACHEDIRS """ for cdir in cachedirs: candidate = os.path.expanduser(cdir + "/" + path) if os.path.lexists(candidate): # If this is a broken link, delete it ~ silently and return None if not os.path.exists(candidate): clogger.debug("Broken link for %s was deleted" % candidate) os.remove(candidate) return None return candidate
def varOf(cobject) : """ Returns the variable for a CliMAF object : if object is a dataset, returns its 'variable' property, otherwise returns variable of first operand """ if isinstance(cobject,classes.cdataset) : return cobject.variable elif isinstance(cobject,classes.cens) : return varOf(cobject.members[0]) elif getattr(cobject,"variable",None) : return getattr(cobject,"variable",None) elif isinstance(cobject,classes.ctree) : clogger.debug("for now, varOf logic is basic (1st operand) - TBD") return varOf(cobject.operands[0]) elif isinstance(cobject,cmacro.cdummy) : return "dummy" else : raise Climaf_Driver_Error("Unknown class for argument "+`cobject`)
def generateUniqueFileName(expression, operator=None, format="nc"): """ Generate a filename path from string EXPRESSION and FILEFORMAT, unique for the expression and the set of cache directories currently listed in cache.cachedirs OPERATOR may be a function that provides a prefix, using EXPRESSION This uses hashlib.sha224, which are truncated to 3 (or more) characters. More characters are used if a shorter name is already in use for another expression in one of the known cache directories Generated names drive a structure where each directory name 1 or 2 characters and file names have no more characters Exits if uniqueness is unachievable (quite unexpectable !) """ # import hashlib directoryNameLength=2 # if format==None : return "" prefix="" if operator is not None : prefix2=operator(expression) if prefix2 is not None : prefix=prefix2+"/" full=hashlib.sha224(expression).hexdigest() number=4 guess=full[0 : number - 1 ] existing=searchFile(prefix+stringToPath(guess, directoryNameLength )+"."+format) if existing : readCRS=getCRS(existing) # Update index if needed if readCRS not in crs2filename : clogger.warning("existing data %s in file %s was not yet registered in cache index"%\ (readCRS,existing)) crs2filename[readCRS]=existing while ( ( existing is not None ) and ( readCRS != expression )) : clogger.debug("must skip %s which CRS is %s"%\ (existing, getCRS(existing) )) number += 2 if (number >= len(full) ) : clogger.critical("Critical issue in cache : "+len(full)+" digits is not enough for "+expression) exit guess=full[0 : number - 1 ] existing=searchFile(prefix+stringToPath(guess, directoryNameLength )+"."+format) if existing : readCRS=getCRS(existing) rep=currentCache+"/"+prefix+stringToPath(full[0 : number - 1 ], directoryNameLength )+"."+format rep=os.path.expanduser(rep) # Create the relevant directory, so that user scripts don't have to care dirn=os.path.dirname(rep) if not os.path.exists(dirn) : os.makedirs(dirn) clogger.debug("returning %s"%rep) return(rep)
def cfilePage(cobj, deep, recurse_list=None) : """ Builds a page with CliMAF figures, computing associated crs Args: cobj (cpage object) Returns : the filename in CliMAF cache, which contains the result (and None if failure) """ if not isinstance(cobj,classes.cpage): raise Climaf_Driver_Error("cobj is not a cpage object") clogger.debug("Computing figure array for cpage %s"%(cobj.crs)) # # page size and creation if cobj.orientation == "portrait": page_width=800. ; page_height=1200. elif cobj.orientation == "landscape": page_width=1200. ; page_height=800. page_size="%dx%d"%(page_width, page_height) args=["convert", "-size", page_size, "xc:white"] # # margins x_left_margin=10. # Left shift at start and end of line y_top_margin=10. # Initial vertical shift for first line x_right_margin=10. # Right shift at start and end of line y_bot_margin=10. # Vertical shift for last line xmargin=20. # Horizontal shift between figures ymargin=20. # Vertical shift between figures # usable_height=page_height-ymargin*(len(cobj.heights)-1.)-y_top_margin -y_bot_margin usable_width =page_width -xmargin*(len(cobj.widths)-1.) -x_left_margin-x_right_margin # # page composition y=y_top_margin for line, rheight in zip(cobj.fig_lines, cobj.heights) : # Line height in pixels height=usable_height*rheight x=x_left_margin for fig, rwidth in zip(line, cobj.widths) : # Figure width in pixels width=usable_width*rwidth scaling="%dx%d+%d+%d" %(width,height,x,y) if fig : figfile=ceval(fig,format="file", deep=deep, recurse_list=recurse_list) else : figfile='xc:None' clogger.debug("Compositing figure %s",fig.crs if fig else 'None') args.extend([figfile , "-geometry", scaling, "-composite" ]) x+=width+xmargin y+=height+ymargin out_fig=cache.generateUniqueFileName(cobj.buildcrs(), format="png") args.append(out_fig) clogger.debug("Compositing figures : %s"%`args`) comm=subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if comm.wait()!=0 : raise Climaf_Driver_Error("Compositing failed : %s" %comm.stderr.read()) if cache.register(out_fig,cobj.crs) : clogger.debug("Registering file %s for cpage %s"%(out_fig,cobj.crs)) return out_fig
def selectEmFiles(**kwargs) : #POur A et L : mon, day1, day2, 6hLev, 6hPlev, 3h simulation=kwargs['simulation'] frequency=kwargs['frequency'] variable=kwargs['variable'] period=kwargs['period'] realm=kwargs['realm'] # freqs={ "mon" : "" , "3h" : "_3h"} f=frequency if f in freqs : f=freqs[f] rep=[] # Must look for all realms, here identified by a single letter if realm=="*" : lrealm= ["A", "L", "O", "I" ] else: lrealm=[ realm ] for realm in lrealm : clogger.debug("Looking for realm "+realm) # Use EM data for finding data dir freq_for_em=f if realm == 'I' : freq_for_em="" # This is a special case ... command=["grep", "^export EM_DIRECTORY_"+realm+freq_for_em+"=", os.path.expanduser(os.getenv("EM_HOME"))+"/expe_"+simulation ] try : ex = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except : clogger.error("Issue getting archive_location for "+ simulation+" for realm "+realm+" with: "+`command`) break if ex.wait()==0 : dir=ex.stdout.read().split("=")[1].replace('"',"").replace("\n","") clogger.debug("Looking at dir "+dir) if os.path.exists(dir) : lfiles= os.listdir(dir) for fil in lfiles : #clogger.debug("Looking at file "+fil) fileperiod=periodOfEmFile(fil,realm,f) if fileperiod and period.intersects(fileperiod) : if fileHasVar(dir+"/"+fil,variable) : rep.append(dir+"/"+fil) #clogger.debug("Done with Looking at file "+fil) else : clogger.error("Directory %s does not exist for EM simulation %s, realm %s " "and frequency %s"%(dir,simulation,realm,f)) else : clogger.info("No archive location found for "+ simulation+" for realm "+realm+" with: "+`command`) return rep
def cMA(obj,deep=None) : """ Provide the Masked Array value for a CliMAF object. Launch computation if needed. Args: obj (CliMAF object) : either a datset or a 'compound' object (like the result of a CliMAF standard operator) deep (logical, optional) : governs the use of cached values when computing the object - if missing, or None : use cache as much as possible - False : make a shallow computation, i.e. do not use cached values for top level operation - True : make a deep computation, i.e. do not use any cached value Returns: a Masked Array containing the object's value """ clogger.debug("cMA called with arguments"+str(obj)) return climaf.driver.ceval(obj,format='MaskedArray',deep=deep)
def selectExampleFiles(urls,**kwargs) : rep=[] if (kwargs['frequency'] == "monthly") : for l in urls : for realm in ["A","L"] : #dir=l+"/"+realm+"/Origin/Monthly/"+simulation dir=l+"/"+realm clogger.debug("Looking at dir "+dir) if os.path.exists(dir) : lfiles= os.listdir(dir) for f in lfiles : clogger.debug("Looking at file "+f) fileperiod=periodOfEmFile(f,realm,'mon') if fileperiod and fileperiod.intersects(kwargs['period']) : if fileHasVar(dir+"/"+f,kwargs['variable']) : rep.append(dir+"/"+f) #else: print "No var ",variable," in file", dir+"/"+f return rep
def attributeOf(cobject,attrib) : """ Returns the attribute for a CliMAF object : if object is a dataset, returns its attribute property, otherwise returns attribute of first operand """ if isinstance(cobject,classes.cdataset) : val=getattr(cobject,attrib,None) if val is not None : return val else : return(cobject.kvp.get(attrib)) elif isinstance(cobject,classes.cens) : return attributeOf(cobject.members[0],attrib) elif getattr(cobject,attrib,None) : return getattr(cobject,attrib) elif isinstance(cobject,classes.ctree) : clogger.debug("for now, varOf logic is basic (1st operand) - TBD") return attributeOf(cobject.operands[0],attrib) elif isinstance(cobject,cmacro.cdummy) : return "dummy" elif isinstance(cobject,classes.cpage) : return None elif cobject is None : return '' else : raise Climaf_Driver_Error("Unknown class for argument "+`cobject`)
def read(filename): """ Read macro dictionary from filename, and add it to cmacros[] """ import json global cmacros macros_texts = None try: macrofile = file(os.path.expanduser(filename), "r") clogger.debug("Macrofile %s read" % (macrofile)) macros_texts = json.load(macrofile) clogger.debug("After reading file %s, macros=%s" % (macrofile, ` macros_texts `)) macrofile.close() except: clogger.info("Issue reading macro file %s ", filename) if macros_texts: for m in macros_texts: clogger.debug("loading macro %s=%s" % (m, macros_texts[m])) macro(str(m), str(macros_texts[m]))
def ceval_select(includer,included,userflags,format,deep,derived_list,recurse_list) : """ Extract object INCLUDED from (existing) object INCLUDER, taking into account the capability of the user process (USERFLAGS) and the required delivering FORMAT(file or object) """ if format=='file' : if userflags.canSelectTime or userflags.canSelectDomain: clogger.debug("TBD - should do smthg smart when user can select time or domain") #includer.setperiod(included.period) incperiod=timePeriod(included) clogger.debug("extract sub period %s out of %s"%(`incperiod`,includer.crs)) extract=capply('select',includer, period=`incperiod`) objfile=ceval(extract,userflags,'file',deep,derived_list,recurse_list) if objfile : crs=includer.buildcrs(period=incperiod) return(cache.rename(objfile,crs)) else : clogger.critical("Cannot evaluate "+`extract`) else : clogger.error("Can yet process only files - TBD")
def getCRS(filename) : """ Returns the CRS expression found in FILENAME's meta-data""" import subprocess if re.findall(".nc$",filename) : form='ncdump -h %s | grep -E "CRS_def *=" | '+\ 'sed -r -e "s/.*:CRS_def *= *\\\"(.*)\\\" *;$/\\1/" ' elif re.findall(".png$",filename) : form='identify -verbose %s | grep -E " *CRS_def: " | sed -r -e "s/.*CRS_def: *//"' else : clogger.critical("unknown filetype for %s"%filename) return None command=form%filename try: rep=subprocess.check_output(command, shell=True).replace('\n','') if (rep == "" ) : clogger.error("file %s is not well formed (no CRS)"%filename) if re.findall(".nc$",filename) : rep=rep.replace(r"\'",r"'") except: rep="failed" clogger.debug("CRS expression read in %s is %s"%(filename,rep)) return rep
def capply_script (script_name, *operands, **parameters): """ Create object for application of a script to OPERANDS with keyword PARAMETERS.""" if script_name not in operators.scripts : raise Climaf_Driver_Error("Script %s is not know. Consider declaring it " "with function 'cscript'", script_name) script=operators.scripts[script_name] if len(operands) != script.inputs_number() : raise Climaf_Driver_Error("Operator %s is " "declared with %d input streams, while you provided %d. Get doc with 'help(%s)'"%( script_name,script.inputs_number(),len(operands), script_name )) # # Check that all parameters to the call are expected by the script for para in parameters : if re.match(r".*\{"+para+r"\}",script.command) is None : if re.match(r".*\{"+para+r"_iso\}",script.command) is None : if para != 'member_label' : raise Climaf_Driver_Error("parameter '%s' is not expected by script %s" "(which command is : %s)"%(para,script_name,script.command)) # # Check that only first operand can be an ensemble opscopy=[ o for o in operands ] opscopy.remove(opscopy[0]) for op in opscopy : if isinstance(op,classes.cens ): raise Climaf_Driver_Error("Cannot yet have an ensemble as operand except as first one") # #print "op(0)="+`operands[0]` #print "script=%s , script.flags.commuteWithEnsemble="%script_name+`script.flags.commuteWithEnsemble` if (isinstance(operands[0],classes.cens) and script.flags.commuteWithEnsemble) : # Must iterate on members reps=[] for member,label in zip(operands[0].members,operands[0].labels) : clogger.debug("processing member "+`member`) params=parameters.copy() params["member_label"]=label reps.append(maketree(script_name, script, member, *opscopy, **params)) return(classes.cens(operands[0].labels,*reps)) else: return(maketree(script_name, script, *operands, **parameters))
def generateUniqueFileName_unsafe(expression, format="nc"): """ Generate a filename path from string EXPRESSION and FILEFORMAT, almost unique for the expression and the cache directory This uses hashlib.sha224, which are truncated to fileNameLength. Generated names drive a structure where each directory name has dirNameLength characters """ # if format == None: return "" prefix = "" full = hashlib.sha224(expression).hexdigest() rep = currentCache + "/" + prefix + stringToPath( full[0:fileNameLength - 1], directoryNameLength) + "." + format rep = os.path.expanduser(rep) # Create the relevant directory, so that user scripts don't have to care dirn = os.path.dirname(rep) if not os.path.exists(dirn): os.makedirs(dirn) clogger.debug("returning %s" % rep) return (rep)
def crewrite(crs, alsoAtTop=True): """ Return the crs expression with sub-trees replaced by macro equivalent when applicable Search order is : from CRS tree root try all macros, then do the same for first subtree, and recursively in depth, and then go to second subtreesecond """ # Next line used for interpreting macros's CRS exec("ARG=climaf.cmacro.cdummy()", sys.modules['__main__'].__dict__) # allow_error_on_ds() try: co = eval(crs, sys.modules['__main__'].__dict__) except: clogger.debug("Issue when rewriting %s" % crs) return (crs) allow_error_on_ds(False) if isinstance(co, ctree) or isinstance(co, scriptChild) or isinstance( co, cpage): if alsoAtTop: for m in cmacros: clogger.debug("looking at macro : "+m+"="+`cmacros[m]`+\ " \ncompared to : "+`macro(None,co)`) argl = cmatch(cmacros[m], co) if len(argl) > 0: rep = m + "(" for arg in argl: rep += crewrite( arg.buildcrs(crsrewrite=crewrite)) + "," rep += ")" rep = rep.replace(",)", ")") return rep # No macro matches at top level, or top level not wished. # Let us dig a bit return (co.buildcrs(crsrewrite=crewrite)) else: return (crs)
def eds(**kwargs): """ Create a dataset ensemble using the same calling sequence as :py:func:`~climaf.classes.cdataset`, except that one of the facets is a list, for defining the nsemble members; this facet must be among the facets authorized for ensemble in the (single) project involved Example:: >>> cdef("frequency","monthly") ; cdef("project","CMIP5"); cdef("model","CNRM-CM5") >>> cdef("variable","tas"); cdef("period","1860") >>> ens=eds(experiment="historical", simulation=["r1i1p1","r2i1p1"]) """ attval=processDatasetArgs(**kwargs) # Check that any facet/attribute of type 'list' (for defining an # ensemble) is OK for the project, and that there is at most one nlist=0 listattr=None for attr in attval : clogger.debug("Looking at attr %s for ensemble"%attr) if isinstance(attval[attr], list) and attr != "domain": if not attr in cprojects[attval["project"]].attributes_for_ensemble : raise Climaf_Classes_Error("Attribute %s cannot be used for ensemble"%attr) clogger.debug("Attr %s is used for an ensemble"%attr) nlist+=1 listattr=attr if nlist != 1 : raise Climaf_Classes_Error("Must ask for an ensemble on exactly one attribute") # # Create an ensemble of datasets if applicable labels=[]; members=[] for member in attval[listattr] : attval2=attval.copy() attval2[listattr]=member members.append(cdataset(**attval2)) labels.append(member) return cens(labels,*members)
def cread(datafile,varname=None): import re if not datafile : return(None) if re.findall(".png$",datafile) : subprocess.Popen(["display",datafile,"&"]) elif re.findall(".nc$",datafile) : clogger.debug("reading NetCDF file %s"%datafile) if varname is None: varname=varOfFile(datafile) if varname is None: return(None) from Scientific.IO.NetCDF import NetCDFFile as ncf fileobj=ncf(datafile) #import netCDF4 #fileobj=netCDF4.Dataset(datafile) # Note taken from the CDOpy developper : .data is not backwards # compatible to old scipy versions, [:] is data=fileobj.variables[varname][:] fillv=fileobj.variables[varname]._FillValue import numpy.ma rep= numpy.ma.array(data,mask = data==fillv) fileobj.close() return(rep) else : clogger.error("cannot yet handle %s"%datafile) return None
def cimport(cobject,crs) : clogger.debug("cimport called with argument",cobject) clogger.debug("should check syntax of arg 'crs' -TBD") clogger.warning("cimport is not for the dummies - Playing at your own risks !") import numpy, numpy.ma if isinstance(cobject,numpy.ma.MaskedArray) : clogger.debug("for now, use a file for importing - should revisit - TBD") clogger.error("not yet implemented fro Masked Arrays - TBD") elif isinstance(cobject,str) : cache.register(cobject,crs) else : clogger.error("argument is not a Masked Array nor a filename",cobject)
def store_wildcard_facet_values(f, facets_regexp, kwargs, wildcards, merge_periods_on=None, fperiod=None, periods=None, periods_dict=None): """ Using a (groups-capable) regexp FACETS_REGEXP for finding facet values, analyze string F for finding the value of each keyword (facet name) in KWARGS, and stores it in dict WILDCARDS, which keys are facet names and values are set of encountered values Regarding periods, ... (TBD) """ if fperiod is not None and periods is not None: clogger.debug('Adding period %s' % fperiod) periods.append(fperiod) # for kw in kwargs: it = re.finditer(facets_regexp, f) for oc in it: try: facet_value = oc.group(kw) except: continue if type(kwargs[kw]) is str and ("*" in kwargs[kw] or "?" in kwargs[kw]): if facet_value is not None: if kw not in wildcards: wildcards[kw] = set() wildcards[kw].add(facet_value) clogger.debug("Discover %s=%s for file=%s" % (kw, facet_value, f)) else: clogger.debug("Logic issue for kw=%s and file=%s" % (kw, f)) # if fperiod is not None and periods is not None: if merge_periods_on is None: key = None elif kw == merge_periods_on: key = facet_value else: # print "Skipping for kw=%s,sort=%s"%(kw,merge_periods_on) continue if key not in periods_dict: periods_dict[key] = set() # print "adding period %s for key %s"%(fperiod,key) periods_dict[key].add(fperiod) else: pass
def timePeriod(cobject) : """ Returns a time period for a CliMAF object : if object is a dataset, returns its time period, otherwise returns time period of first operand """ if isinstance(cobject,classes.cdataset) : return cobject.period elif isinstance(cobject,classes.ctree) : clogger.debug("for now, timePeriod logic for scripts output is basic (1st operand) - TBD") return timePeriod(cobject.operands[0]) elif isinstance(cobject,classes.scriptChild) : clogger.debug("for now, timePeriod logic for scriptChilds is basic - TBD") return timePeriod(cobject.father) elif isinstance(cobject,classes.cens) : clogger.debug("for now, timePeriod logic for 'cens' objet is basic (1st member)- TBD") return timePeriod(cobject.members[0]) else : clogger.error("unkown class for argument "+`cobject`)
def selectFiles(return_wildcards=None, merge_periods_on=None, **kwargs): """ Returns the shortest list of (local or remote) files which include the data for the list of (facet,value) pairs provided Method : - use datalocations indexed by :py:func:`~climaf.dataloc.dataloc` to identify data organization and data store urls for these (facet,value) pairs - check that data organization is as known one, i.e. is one of 'generic', CMIP5_DRS' or 'EM' - derive relevant filenames search function such as as : py:func:`~climaf.dataloc.selectCmip5DrsFiles` from data organization scheme - pass urls and relevant facet values to this filenames search function """ rep = [] project = kwargs['project'] simulation = kwargs['simulation'] if 'model' in kwargs: model = kwargs['model'] else: model = "*" if 'frequency' in kwargs: frequency = kwargs['frequency'] else: frequency = "*" ofu = getlocs(project=project, model=model, simulation=simulation, frequency=frequency) clogger.debug("locs=" + repr(ofu)) if len(ofu) == 0: clogger.warning("no datalocation found for %s %s %s %s " % (project, model, simulation, frequency)) for org, freq, urls in ofu: if return_wildcards is not None and org is not "generic": raise classes.Climaf_Error( "Can handle multipe facet query only for organization=generic " ) kwargs2 = kwargs.copy() # Convert normalized frequency to project-specific frequency if applicable if "frequency" in kwargs and project in classes.frequencies: normfreq = kwargs2['frequency'] if normfreq in classes.frequencies[project]: kwargs2['frequency'] = classes.frequencies[project][normfreq] # JS # Convert normalized realm to project-specific realm if applicable if "realm" in kwargs and project in classes.realms: normrealm = kwargs2['realm'] if normrealm in classes.realms[project]: kwargs2['realm'] = classes.realms[project][normrealm] # # Call organization-specific routine if org == "EM": rep.extend(selectEmFiles(**kwargs2)) elif org == "CMIP5_DRS": rep.extend(selectCmip5DrsFiles(urls, **kwargs2)) elif org == "generic": rep.extend( selectGenericFiles(urls, return_wildcards=return_wildcards, merge_periods_on=merge_periods_on, **kwargs2)) else: raise classes.Climaf_Error("Cannot process organization " + org + " for simulation " + simulation + " and model " + model + " of project " + project) if not ofu: return None else: if len(rep) == 0: clogger.warning("no file found for %s, at these " "data locations %s " % (repr(kwargs), repr(urls))) if any([kwargs[k] == '' for k in kwargs]): clogger.warning("Please check these empty attributes %s" % [k for k in kwargs if kwargs[k] == '']) return None # Discard duplicates (assumes that sorting is harmless for later processing) rep.sort() last = None for f in rep: if f == last: rep.remove(last) last = f # Assemble filenames in one single string return string.join(rep)
def __init__(self, name, command, format="nc", canOpendap=False, commuteWithTimeConcatenation=False, commuteWithSpaceConcatenation=False, canSelectVar=False, **kwargs): """ Declare a script or binary as a 'CliMAF operator', and define a Python function with the same name Args: name (str): name for the CliMAF operator. command (str): script calling sequence, according to the syntax described below. format (str): script outputs format -- either 'nc', 'png', 'pdf', 'eps', 'None' or 'graph' ('graph' allows to the user to choose three different graphic output formats: 'png', 'pdf' or 'eps') or 'txt' (the text output are not managed by CliMAF, but only displayed - 'txt' allows to use e.g. 'ncdump -h' from inside CliMAF); defaults to 'nc' canOpendap (bool, optional): is the script able to use OpenDAP URIs ? default to False commuteWithTimeConcatenation (bool, optional): can the operation commute with concatenation of time periods ? set it to true, if the operator can be applied on time chunks separately, in order to allow for incremental computation / time chunking; defaults to False commuteWithSpaceConcatenation (bool, optional): can the operation commute with concatenation of space domains ? defaults to False (see commuteWithTimeConcatenation) **kwargs : possible keyword arguments, with keys matching '<outname>_var', for providing a format string allowing to compute the variable name for output 'outname' (see below). Returns: None The script calling sequence pattern string (arg 'command') indicates how to build the system call which actually launches the script, with a match between python objects and formal arguments; For introducing the syntax, please consider this example, with the following commands:: >>> cscript('mycdo','cdo ${operator} ${in} ${out}') >>> # define some dataset >>> tas_ds = ds(project='example', simulation='AMIPV6ALB2G', variable='tas', period='1980-1981') >>> # Apply operator 'mycdo' to dataset 'tas_ds', choosing a given 'operator' argument >>> tas_avg = mycdo(tas_ds,operator='timavg') CliMAF will later on launch this call behind the curtain:: $ cdo tim_avg /home/my/tmp/climaf_cache/8a/5.nc /home/my/tmp/climaf_cache/4e/4.nc where : - the last filename is generated by CliMAF from the formal expression describing 'tas_avg', and will receive the result - the first filename provides a file generated by CliMAF which includes the data required for tas_ds There are a number of examples declared in module :download:`standard_operators <../climaf/standard_operators.py>`. **Detailed syntax**: - formal arguments appear as : ``${argument}`` (in the example : ``${in}``, ``${out}``, ``${operator}`` ) - except for reserved keywords, arguments in the pattern will be replaced by the values for corresponding keywords used when invoking the diagnostic operator: - in the example above : argument ``operator`` is replaced by value ``timavg``, which is a keyword known to the external binary called, CDO - reserved argument keywords are : - **in, in_<digit>, ins, ins_<digit>, mmin** : they will be replaced by CliMAF managed filenames for input data, as deduced from dataset description or upstream computation; these filenames can actually be remote URLs (if the script can use OpenDAP, see args), local 'raw' data files, or CliMAF cache filenames - **in** stands for the URL of the first dataset invoked in the operator call - **in_<digit>** stands for the next ones, in the same order - **ins** and **ins_<digit>** stand for the case where the script can select input from multiple input files or URLs (e.g. when the whole period to process spans over multiple files); in that case, a single string (surrounded with double quotes) will carry multiple URLs - **mmin** stands for the case where the script accepts an ensemble of datasets (only for first input stream yet). CliMAF will replace the keyword by a string composed of the corresponding input filenames (not surrounded by quotes - please add them yourself in declaration); see also ``labels`` below - **var, var_<digit>** : when a script can select a variable in a multi-variable input stream, this is declared by adding this keyword in the calling sequence; CliMAF will replace it by the actual variable name to process; 'var' stands for first input stream, 'var_<digit>' for the next ones; - in the example above, we assume that external binary CDO is not tasked with selecting the variable, and that CliMAF must feed CDO with a datafile where it has already performed the selection - **period, period_<digit>** : when a script can select a time period in the content of a file or stream, it should declare it by putting this keyword in the pattern, which will be replaced at call time by the period written as <date1>-<date2>, where date is formated as YYYYMMDD ; - time intervals must be interpreted as [date1, date2[ - 'period' stands for the first input_stream, - 'period_<n>' for the next ones, in the order of actual call; - in the example above, this keyword is not used, which means that CliMAF has to select the period upstream of feeding CDO with the data - **period_iso, period_iso_<digit>** : as for **period** above, except that the date formating fits CDO conventions : - date format is ISO : YYYY-MM-DDTHH:MM:SS - interval is [date1,date2_iso], where date2_iso is 1 minute before date2 - separator between dates is : , - **domain, domain_<digit>** : when a script can select a domain in the input grid, this is declared by adding this keyword in the calling sequence; CliMAF will replace it by the domain definition if needed, as 'latmin,latmax,lonmin,lonmax' ; 'domain' stands for first input stream, 'domain_<digit>' for the next ones : - in the example above, we assume that external binary CDO is not tasked with selecting the domain, and that CliMAF must feed CDO with a datafile where it has already performed the selection - **out, out_<word>** : CliMAF provide file names for output files (if there is no such field, the script will have only 'side effects', e.g. launch a viewer). Main output file must be created by the script with the name provided at the location of argument ${out}. Using arguments like 'out_<word>' tells CliMAF that the script provide some secondary output, which will be symbolically known in CliMAF syntax as an attribute of the main object; by default, the variable name of each output equals the name of the output (except for the main ouput, which variable name is supposed to be the same as for the first input); for other cases, see argument \*\*kwargs to provide a format string, used to derive the variable name from first input variable name as in e.g. : ``output2_var='std_dev(%s)'`` for the output labelled output2 (i.e. declared as '${out_output2}') or ``_var='std_dev(%s)'`` for the default (main) output - in the example above, we just apply the convention used by CDO, which expects that you provide an output filename as last argument on the command line. See example mean_and_sdev in doc for advanced usage. - **crs** : will be replaced by the CliMAF Reference Syntax expression describing the first input stream; can be useful for plot title or legend - **alias** : used if the script can make an on the fly re-scaling and renaming of a variable. Will be replaced by a string which pattern is : 'new_varname,file_varname,scale,offset'. The script should then transform on reading as new_varname = file_varname * scale + offset - **units, units_<digit>** : means that the script can set the units on-the-fly while reading one of the input streams - **missing** : means that the script can make an on-the-fly transformation of a given constant to missing values - **labels** : for script accepting ensembles, CliMAF will replace this keyword by a string bearing the labels associated with the ensemble, with delimiter $ as e.g. in: "CNRM-CM5 is fine$IPSL-CM5-LR is not bad$CCSM-29 is ..." """ # Check that script name do not clash with an existing symbol if name in sys.modules['__main__'].__dict__ and name not in scripts: clogger.error("trying to define %s as an operator, " "while it exists as smthing else" % name) return None if name in scripts: clogger.warning("Redefining CliMAF script %s" % name) # # Check now that script is executable scriptcommand = command.split(' ')[0].replace("(", "") ex = subprocess.Popen(['which', scriptcommand], stdout=subprocess.PIPE) if ex.wait() != 0: Climaf_Operator_Error("defining %s : command %s is not " "executable" % (name, scriptcommand)) executable = ex.stdout.read().replace('\n', '') # # Analyze inputs field keywords and populate dict # attribute 'inputs' with some properties self.inputs = dict() commuteWithEnsemble = True it = re.finditer( r"\${(?P<keyw>(?P<mult>mm)?in(?P<serie>s)?(_(?P<n>([\d]+)))?)}", command) for oc in it: if oc.group("n") is not None: rank = int(oc.group("n")) else: rank = 0 if rank in self.inputs: Climaf_Operator_Error( "When defining %s : duplicate declaration for input #%d" % (name, rank)) serie = (oc.group("serie") is not None) multiple = (oc.group("mult") is not None) if multiple: if rank != 0: raise Climaf_Operator_Error( "Only first operand may accept members") if serie: raise Climaf_Operator_Error("Operand %s cannot both accept" "members and files set" % oc.group("keyw")) commuteWithEnsemble = False self.inputs[rank] = (oc.group("keyw"), multiple, serie) if len(self.inputs) == 0: Climaf_Operator_Error( "When defining %s : command %s must include at least one of " "${in} ${ins} ${mmin} or ${in_..} ... for specifying how CliMAF" " will provide the input filename(s)" % (name, command)) # print self.inputs for i in range(len(self.inputs)): if i + 1 not in self.inputs and not (i == 0 and 0 in self.inputs): Climaf_Operator_Error( "When defining %s : error in input sequence for rank %d" % (name, i + 1)) # # Check if command includes an argument allowing for # providing an output filename if command.find("${out") < 0: if format is not "txt": format = None # # Search in call arguments for keywords matching "<output_name>_var" # which may provide format string for 'computing' outputs variable # name from input variable name outvarnames = dict() pattern = r"^(.*)_var$" for p in kwargs: if re.match(pattern, p): outvarnames[re.findall(pattern, p)[0]] = kwargs[p] clogger.debug("outvarnames for script %s = %s" % (name, repr(outvarnames))) # # Analyze outputs names , associated variable names # (or format strings), and store it in attribute dict 'outputs' self.outputs = dict() it = re.finditer(r"\${out(_(?P<outname>[\w-]*))?}", command) for occ in it: outname = occ.group("outname") if outname is not None: if outname in outvarnames: self.outputs[outname] = outvarnames[outname] else: self.outputs[outname] = "%s" # outname else: self.outputs[None] = outvarnames.get('', "%s") self.outputs[''] = outvarnames.get('', "%s") # clogger.debug("outputs = "+`self.outputs`) # canSelectVar = canSelectVar or (command.find("${var}") > 0) canAggregateTime = (command.find("${ins}") > 0 or command.find("${ins_1}") > 0) canAlias = (command.find("${alias}") > 0) canMissing = (command.find("${missing}") > 0) canSelectTime = False if command.find("${period}") > 0 or command.find("${period_1}") > 0: canSelectTime = True if command.find("${period_iso}") > 0 or command.find( "${period_iso_1}") > 0: canSelectTime = True canSelectDomain = (command.find("${domain}") > 0 or command.find("${domain_1}") > 0) # self.name = name self.command = command self.fixedfields = None self.flags = scriptFlags(canOpendap, canSelectVar, canSelectTime, canSelectDomain, canAggregateTime, canAlias, canMissing, commuteWithEnsemble, commuteWithTimeConcatenation, commuteWithSpaceConcatenation) if format in known_formats or format in graphic_formats or format in none_formats: self.outputFormat = format else: raise Climaf_Operator_Error( "Allowed formats yet are : 'object', 'nc', 'txt', %s" % ', '.join([repr(x) for x in graphic_formats])) scripts[name] = self # Init doc string for the operator doc = "CliMAF wrapper for command : %s" % self.command # try to get a better doc string from colocated doc/directory docfilename = os.path.dirname( __file__) + "/../doc/scripts/" + name + ".rst" # print "docfilen= "+docfilename try: docfile = open(docfilename) doc = docfile.read() docfile.close() except: pass # # creates a function named as requested, which will invoke # capply with that name and same arguments defs = 'def %s(*args,**dic) :\n """%s"""\n return driver.capply("%s",*args,**dic)\n' \ % (name, doc, name) exec defs in globals() # exec "from climaf.operators import %s" % name in \ sys.modules['__main__'].__dict__ clogger.debug("CliMAF script %s has been declared" % name)
def macro(name, cobj, lobjects=[]): """ Define a CliMAF macro from a CliMAF compound object. Transform a Climaf object in a macro, replacing all datasets, and the objects of lobjects, by a dummy argument. Register it in dict cmacros, if name is not None Args: name (string) : the name you want to give to the macro; a Python function with the same name will be defined cobj (CliMAF object, or string) : any CliMAF object, usually the result of a series of operators, that you would like to repeat using other input datasets; alternatively, you can provide the macro formula as a string (when accustomed to the syntax) lobjects (list, optional): for expert use- a list of objects, which are sub-objects of cobject, and which should become arguments of the macro Returns: a macro; the returned value is usualy not used 'as is' : a python function is also defined in module cmacros and in main namespace, and you may use it in the same way as a CliMAF operator. All the datasets involved in ``cobj`` become arguments of the macro, which allows you to re-do the same computations and easily define objects similar to ``cobjs`` Example:: >>> # First use and combine CliMAF operators to get some interesting result using some dataset(s) >>> january_ta=ds(project='example',simulation='AMIPV6ALB2G',variable='ta',frequency='monthly',period='198001') >>> ta_europe=llbox(january_ta,latmin=40,latmax=60,lonmin=-15,lonmax=25) >>> ta_ezm=ccdo(ta_europe,operator='zonmean') >>> fig_ezm=plot(ta_ezm) >>> # >>> # Using this result as an example, define a macro named 'eu_cross_section', >>> # which arguments will be the datasets involved in this result >>> cmacro('eu_cross_section',fig_ezm) >>> # >>> # You can of course apply a macro to another dataset(s) (even here to a 2D variable) >>> pr=ds(project='example',simulation='AMIPV6ALB2G', variable='pr', frequency='monthly', period='198001') >>> pr_ezm=eu_cross_section(pr) >>> # >>> # All macros are registered in dictionary climaf.cmacro.cmacros, >>> # which is imported by climaf.api; you can list it by : >>> cmacros Note : macros are automatically saved in file ~/.climaf.macros, and can be edited See also much more explanations in the example at :download:`macro.py <../examples/macro.py>` """ if isinstance(cobj, str): s = cobj # Next line used for interpreting macros's CRS exec("from climaf.cmacro import cdummy; ARG=cdummy()", sys.modules['__main__'].__dict__) try: cobj = eval(cobj, sys.modules['__main__'].__dict__) except: # usually case of a CRS which project is not currently defined clogger.error( "Cannot interpret %s with the projects currently define" % s) return None #print "string %s was interpreted as %s"%(s,cobj) domatch = False for o in lobjects: domatch = domatch or cobj==o or \ ( isinstance(cobj,cobject) and cobj.buildcrs() == o.buildcrs()) if isinstance(cobj, cdataset) or isinstance(cobj, cdummy) or domatch: return cdummy() elif isinstance(cobj, ctree): rep = ctree(cobj.operator, cobj.script, *cobj.operands, **cobj.parameters) rep.operands = map(macro, [None for o in rep.operands], rep.operands) elif isinstance(cobj, scriptChild): rep = scriptChild(macro(None, cobj.father), cobj.varname) elif isinstance(cobj, cpage): rep = cpage([ map(macro, [None for fig in line], line) for line in cobj.fig_lines ], cobj.widths, cobj.heights) elif isinstance(cobj, cens): d = dict() for k, v in zip( cobj.keys(), map(macro, [None for o in cobj.values()], cobj.values())): d[k] = v rep = cens(d) elif cobj is None: return None else: clogger.error("Cannot yet handle object :%s", ` cobj `) rep = None if name and rep: cmacros[name] = rep doc = "A CliMAF macro, which text is " + ` rep ` defs='def %s(*args) :\n """%s"""\n return instantiate(cmacros["%s"],[ x for x in args])\n'\ % (name,doc,name) exec defs in globals() exec "from climaf.cmacro import %s" % name in sys.modules[ '__main__'].__dict__ clogger.debug("Macro %s has been declared" % name) return rep
def lonlatvert_interpolation(dat1, dat2=None, vertical_levels=None, cdo_horizontal_grid='r1x90', horizontal_regridding=True): """ Interpolates a lon/lat/pres field dat1 via two possible ways: - either by providing a target lon/lat/pres field dat2 => dat1 is regridded both horizontally and vertically on dat2 - or by providing a list of vertical levels => dat1 is regridded horizontally on the cdo_horizontal_grid (default='r1x90'), and vertically on the list of vertical levels The user can provide the vertical levels (in Pa) like this: vertical_levels=[100000,85000,50000,20000,...] # or vertical_levels='100000,85000,50000,20000' Before the computations, the function checks the unit of the vertical axis; it is converted to Pa if necessary directly in the netcdf file(s) corresponding to dat1(2). >>> dat = ds(project='CMIP5',model='IPSL-CM5A-LR',variable='ua',period='1980-1985', experiment='historical',table='Amon') >>> ref = ds(project='ref_pcmdi',variable='ua',product='ERAINT') >>> zonmean_dat = zonmean(time_average(dat)) >>> zonmean_ref = zonmean(time_average(ref)) >>> dat_interpolated_on_ref = lonlatvert_interpolation(zonmean_dat,zonmean_ref) >>> dat_interpolated_on_list_of_levels = lonlatvert_interpolation(zonmean_dat,vertical_levels='100000,85000,50000,20000,10000,5000,2000,1000') """ from climaf.anynetcdf import ncf from climaf import cachedir file1 = cfile(dat1) clogger.debug('file1 = %s' % file1) ncfile1 = ncf(file1) # -- First, we check the unit of the vertical dimension of file1 levname1 = None for varname in ncfile1.variables: if varname.lower() in ['level', 'levels', 'lev', 'levs', 'depth', 'deptht', 'olevel'] or 'plev' in varname.lower(): levname1 = varname if not levname1: clogger.debug('Name of the vertical axis not found for dat1') levunits1 = ncfile1.variables[levname1].units if levunits1.lower() in ['hpa', 'millibar', 'mbar', 'hectopascal']: # -- Multiplier par 100 cscript('convert_plev_hPa_to_Pa', 'ncap2 -As "' + levname1 + '=' + levname1 + '*100" ${in} ' + cachedir + '/convert_to_Pa_tmp.nc ; ncatted -O -a units,' + levname1 + ',o,c,Pa ' + cachedir + '/convert_to_Pa_tmp.nc ; mv ' + cachedir + '/convert_to_Pa_tmp.nc ${out}') dat1 = climaf.operators.convert_plev_hPa_to_Pa(dat1) # -> The vertical axis of file1 is now set to Pa # # -- Second, we check the unit of the vertical dimension of file2 if dat2: file2 = cfile(dat2) clogger.debug('file2 = %s' % file2) ncfile2 = ncf(file2) levname2 = None for varname in ncfile2.variables: if varname.lower() in ['level', 'levels', 'lev', 'levs', 'depth', 'deptht', 'olevel'] or 'plev' in varname.lower(): levname2 = varname clogger.debug('levname2 = %s' % levname2) if not levname2: clogger.debug('Name of the vertical axis not found for dat2') levunits2 = ncfile2.variables[levname2].units clogger.debug('ncfile2 = %s' % ncfile2) try: levValues2 = ncfile2.variables[levname2].getValue() except: try: levValues2 = ncfile2.variables[levname2].data except: levValues2 = ncfile2[levname2][0:len(ncfile2[levname2])] if levunits2.lower() in ['hpa', 'millibar', 'mbar', 'hectopascal']: # -- Multiplier par 100 cscript('convert_plev_hPa_to_Pa', 'ncap2 -As "' + levname2 + '=' + levname2 + '*100" ${in} ' + cachedir + '/convert_to_Pa_tmp.nc ; ncatted -O -a units,' + levname2 + ',o,c,Pa ' + cachedir + '/convert_to_Pa_tmp.nc ; mv ' + cachedir + '/convert_to_Pa_tmp.nc ${out}') dat2 = climaf.operators.convert_plev_hPa_to_Pa(dat2) # -> The vertical axis of file2 is now set to Pa in the netcdf file scale = 100.0 else: scale = 1.0 # # --> We get the values of the vertical levels of dat2 (from the original file, that's why we apply a scale) levels = '' for lev in levValues2: levels = levels + ',' + str(lev * scale) # # --> We can now interpolate dat1 on dat2 verticaly and horizontally if horizontal_regridding: regridded_dat1 = ccdo(regrid(dat1, dat2, option='remapdis'), operator='intlevel' + levels) else: regridded_dat1 = ccdo(dat1, operator='intlevel' + levels) else: if vertical_levels: if isinstance(vertical_levels, list): levels = '' for lev in vertical_levels: levels = levels + ',' + str(lev) else: levels = ',' + vertical_levels if horizontal_regridding: regridded_dat1 = ccdo(regridn(dat1, cdogrid=cdo_horizontal_grid), operator='intlevel' + levels) else: regridded_dat1 = ccdo(dat1, operator='intlevel' + levels) else: clogger.error('--> Provide a list of vertical levels with vertical_levels') return regridded_dat1
def register(filename, crs, outfilename=None): """ Adds in FILE a metadata named 'CRS_def' and with value CRS, and a metadata 'CLiMAF' with CliMAF version and ref URL Records this FILE in dict crs2filename If OUTFILENAME is not None, FILENAME is a temporary file and it's OUTFILENAME which is recorded in dict crs2filename Silently skip non-existing files """ # First read index from file if it is yet empty - No : done at startup # if len(crs2filename.keys()) == 0 : cload() # It appears that we have to let some time to the file system for updating its inode tables global dropped_crs if not stamping: clogger.debug('No stamping') crs2filename[crs] = filename return True waited = 0 while waited < 50 and not os.path.exists(filename): time.sleep(0.1) waited += 1 # time.sleep(0.5) if os.path.exists(filename): # while time.time() < os.path.getmtime(filename) + 0.2 : time.sleep(0.2) if re.findall(".nc$", filename): command = "ncatted -h -a CRS_def,global,o,c,\"%s\" -a CliMAF,global,o,c,\"CLImate Model Assessment " \ "Framework version %s (http://climaf.rtfd.org)\" %s" % (crs, version, filename) if re.findall(".png$", filename): crs2 = crs.replace("%", "\%") command = "convert -set \"CRS_def\" \"%s\" -set \"CliMAF\" \"CLImate Model Assessment Framework version " \ "%s (http://climaf.rtfd.org)\" %s %s.png && mv -f %s.png %s" % \ (crs2, version, filename, filename, filename, filename) if re.findall(".pdf$", filename): tmpfile = str(uuid.uuid4()) command = "pdftk %s dump_data output %s && echo -e \"InfoBegin\nInfoKey: Keywords\nInfoValue: %s\" >> %s " \ "&& pdftk %s update_info %s output %s.pdf && mv -f %s.pdf %s && rm -f %s" % \ (filename, tmpfile, crs, tmpfile, filename, tmpfile, filename, filename, filename, tmpfile) if re.findall(".eps$", filename): command = 'exiv2 -M"add Xmp.dc.CliMAF CLImate Model Assessment Framework version %s ' \ '(http://climaf.rtfd.org)" -M"add Xmp.dc.CRS_def %s" %s' % \ (version, crs, filename) clogger.debug("trying stamping by %s" % command) if os.system(command) == 0: if outfilename: cmd = 'mv -f %s %s ' % (filename, outfilename) if os.system(cmd) == 0: clogger.info("move %s as %s " % (filename, outfilename)) clogger.info("%s registered as %s" % (crs, outfilename)) crs2filename[crs] = outfilename if crs in dropped_crs: dropped_crs.remove(crs) return True else: clogger.critical("cannot move by" % cmd) exit() return None else: clogger.info("%s registered as %s" % (crs, filename)) crs2filename[crs] = filename if crs in dropped_crs: dropped_crs.remove(crs) return True else: clogger.critical("cannot stamp by %s" % command) exit() return None else: clogger.error("file %s does not exist (for crs %s)" % (filename, crs))
def selectGenericFiles(urls, **kwargs): """ Allow to describe a ``generic`` file organization : the list of files returned by this function is composed of files which : - match the patterns in ``url`` once these patterns are instantiated by the values in kwargs, and - contain the ``variable`` provided in kwargs - match the `period`` provided in kwargs In the pattern strings, no keyword is mandatory. However, for remote files, filename pattern must include ${varname}, which is instanciated by variable name or ``filenameVar`` (given via :py:func:`~climaf.classes.calias()`); this is for the sake of efficiency (please complain if inadequate) Example : >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*YYYY*.nc)'] /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc In the pattern strings, the keywords that can be used in addition to the argument names (e.g. ${model}) are: - ${variable} : use it if the files are split by variable and filenames do include the variable name, as this speed up the search - YYYY, YYYYMM, YYYYMMDD : use it for indicating the start date of the period covered by each file, if this is applicable in the file naming; use a second time for end date, if applicable (otherwise the assumption is that the whole year -resp. month or day- is included in the file - wildcards '?' and '*' for matching respectively one and any number of characters """ rep = [] period = kwargs['period'] if type(period) is str: period = init_period(period) variable = kwargs['variable'] altvar = kwargs.get('filenameVar', variable) # a dict and an ordered list of date globbing patterns dt = dict(YYYY="????", YYYYMM="??????", YYYYMMDD="????????", YYYYMMDDHH="??????????") lkeys = dt.keys() lkeys.sort(reverse=True) # a dict and an ordered list for matching dates dr = dict(YYYY="([0-9]{4})", YYYYMM="([0-9]{6})", YYYYMMDD="([0-9]{8})", YYYYMMDDHH="([0-9]{10})") rkeys = dr.keys() rkeys.sort(reverse=True) # for l in urls: # Instantiate keywords in pattern with attributes values if re.findall(".*:.*", l): # remote data remote_prefix = ':'.join(l.split(":")[0:-1]) + ':' template = Template(l.split(":")[-1]).safe_substitute(**kwargs) else: # local data remote_prefix = "" template = Template(l).safe_substitute(**kwargs) #print "template after attributes replace : "+template # # Construct a pattern for globbing dates temp2 = template for k in lkeys: temp2 = temp2.replace(k, dt[k]) if remote_prefix: lfiles = sorted(glob_remote_data(remote_prefix, temp2)) clogger.debug("Remote globbing %d files for varname on %s : " % (len(lfiles), remote_prefix + temp2)) else: # local data lfiles = sorted(glob.glob(temp2)) clogger.debug("Globbing %d files for varname on %s : " % (len(lfiles), temp2)) # # If unsuccessful using varname, try with filenameVar if len(lfiles ) == 0 and "filenameVar" in kwargs and kwargs['filenameVar']: # Change value of facet 'variable' kwargs['variable'] = kwargs['filenameVar'] if remote_prefix: # remote data template = Template(l.split(":")[-1]).safe_substitute(**kwargs) else: # local data template = Template(l).safe_substitute(**kwargs) temp2 = template for k in lkeys: temp2 = temp2.replace(k, dt[k]) # if remote_prefix: # lfiles = sorted(glob_remote_data(remote_prefix, temp2)) clogger.debug("Globbing %d files for filenamevar on %s: " % (len(lfiles), remote_prefix + temp2)) else: # local data lfiles = sorted(glob.glob(temp2)) clogger.debug("Globbing %d files for filenamevar on %s: " % (len(lfiles), temp2)) # # Construct regexp for extracting dates from filename regexp = None #print "template before searching dates : "+template for key in rkeys: #print "searchin "+key+" in "+=Template(l) start = template.find(key) if (start >= 0): #print "found "+key regexp = template.replace(key, dr[key], 1) hasEnd = False start = regexp.find(key) if (start >= 0): hasEnd = True regexp = regexp.replace(key, dr[key], 1) break #print "regexp before searching dates : "+regexp # for f in lfiles: #print "processing file "+f # # Analyze file time period fperiod = None if regexp: regexp0 = regexp.replace("*", ".*").replace("?", r".") #print "regexp for extracting dates : "+regexp start = re.sub(regexp0, r'\1', f) if start == f: raise Climaf_Data_Error("Start period not found") #? if hasEnd: end = re.sub(regexp0, r'\2', f) fperiod = init_period("%s-%s" % (start, end)) else: fperiod = init_period(start) #print "period for file %s is %s"%(f,fperiod) # # Filter file time period against required period else: if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \ kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) : # local data if remote_prefix and \ ( (l.find("${variable}")>=0) or variable=='*' or \ fileHasVar(f,variable) or (variable != altvar and fileHasVar(f,altvar)) ) : clogger.debug("adding fixed field :" + f) rep.append(f) # remote data elif remote_prefix is not "": if (l.split(":")[-1].find("${variable}")>=0) or variable=='*' or \ (variable != altvar and (f.find(altvar)>=0) ): clogger.debug("adding fixed field :" + remote_prefix + f) rep.append(remote_prefix + f) else: raise Climaf_Data_Error( "For remote files, filename pattern (%s) should include ${varname} (which is instanciated by variable name or filenameVar)" % f) else: clogger.info( "Cannot yet filter files re. time using only file content." ) rep.append(f) if (fperiod and period.intersects(fperiod)) or not regexp: clogger.debug( 'Period is OK - Considering variable filtering on %s and %s for %s' % (variable, altvar, f)) # Filter against variable if (l.find("${variable}") >= 0): clogger.debug( 'appending %s based on variable in filename' % f) rep.append(remote_prefix + f) continue if (f not in rep): # local data if not remote_prefix and \ (variable=='*' or "," in variable or fileHasVar(f,variable) or \ (altvar != variable and fileHasVar(f,altvar))) : # Should check time period in the file if not regexp clogger.debug( 'appending %s based on multi-var or var exists in file ' % f) rep.append(f) continue # remote data elif remote_prefix: if variable=='*' or "," in variable or \ (variable != altvar and (f.find(altvar)>=0) ): # Should check time period in the file if not regexp clogger.debug( 'appending %s based on multi-var or altvar ' % (remote_prefix + f)) rep.append(remote_prefix + f) continue else: mess = "For remote files, filename pattern (%s) should include" % ( remote_prefix + f) mess += " ${varname} (which is instanciated by variable name or filenameVar)" raise Climaf_Data_Error(mess) else: if not fperiod: clogger.debug('not appending %s because period is None ' % f) else: if not period.intersects(fperiod): clogger.debug( 'not appending %s because period doesn t intersect %s' % (f, period)) # Break on first url with any matching data if len(rep) > 0: clogger.debug('url %s does match for ' % l + ` kwargs `) break return rep