Esempio n. 1
0
def ds(*args,**kwargs) :
    """
    Returns a dataset from its full Climate Reference Syntax string. Example ::

     >>> ds('CMIP5.historical.pr.[1980].global.monthly.CNRM-CM5.r1i1p1.mon.Amon.atmos.last')

    Also a shortcut for :py:meth:`~climaf.classes.cdataset`, 
    when used with with only keywords arguments. Example ::

     >>> cdataset(project='CMIP5', model='CNRM-CM5', experiment='historical', frequency='monthly',\
              simulation='r2i3p9', domain=[40,60,-10,20], variable='tas', period='1980-1989', version='last')

    """
    if len(args) >1 :
        raise Climaf_Classes_Error("Must provide either only a string or only keyword arguments")
    #clogger.debug("Entering , with args=%s, kwargs=%s"%(`args`,`kwargs`))
    if (len(args)==0) : return cdataset(**kwargs) # Front-end to cdataset
    crs=args[0]
    results=[]
    for cproj in cprojects : 
        try : dataset = cprojects[cproj].crs2ds(crs) 
        except Climaf_Classes_Error: dataset=None
        if (dataset) : results.append(dataset)
    if len(results) > 1 :
        e="CRS expression %s is ambiguous among projects %s"%(crs,`cprojects.keys()`)
        if allow_errors_on_ds_call : clogger.info(e)
        else : raise Climaf_Classes_Error(e)
    elif len(results) == 0 :
        e="CRS expression %s is not valid for any project in %s"%(crs,`cprojects.keys()`)
        if allow_errors_on_ds_call : clogger.info(e)
        else : raise Climaf_Classes_Error(e)
    else : 
        rep=results[0]
        if rep.project=='file' : rep.files=rep.kvp["path"]
        return rep
Esempio n. 2
0
def register(filename,crs):
    """ 
    Adds in FILE a metadata named 'CRS_def' and with value CRS, and a
    metadata 'CLiMAF' with CliMAF version and ref URL

    Records this FILE in dict crs2filename

    Silently skip non-existing files
    """
    # First read index from file if it is yet empty - No : done at startup
    # if len(crs2filename.keys()) == 0 : cload()
    # It appears that we have to let some time to the file system  for updating its inode tables
    waited=0
    while waited < 20 and not os.path.exists(filename) :
        time.sleep(0.1)
        waited += 1
    #time.sleep(0.5)
    if os.path.exists(filename) :
        #while time.time() < os.path.getmtime(filename) + 0.2 : time.sleep(0.2)
        if re.findall(".nc$",filename) : 
            command="ncatted -h -a CRS_def,global,o,c,\"%s\" -a CliMAF,global,o,c,\"CLImate Model Assessment Framework version %s (http://climaf.rtfd.org)\" %s"%\
                (crs,version,filename)
        if re.findall(".png$",filename) :
            command="convert -set \"CRS_def\" \"%s\" -set \"CliMAF\" \"CLImate Model Assessment Framework version %s (http://climaf.rtfd.org)\" %s %s.png && mv -f %s.png %s"%\
                (crs,version,filename,filename,filename,filename)
        clogger.debug("trying stamping by %s"%command)
        if ( os.system(command) == 0 ) :
            crs2filename[crs]=filename
            clogger.info("%s registered as %s"%(crs,filename))
            return True
        else : 
            clogger.critical("cannot stamp by %s"%command)
            return None
    else :
        clogger.error("file %s does not exist (for crs %s)"%(filename,crs))
Esempio n. 3
0
def register(filename,crs):
    """ Adds in FILE a metadata named CRS_def and with value CRS. 
    Records this FILE in dict crs2filename

    Silently skip non-existing files
    """
    # First read index from file if it is yet empty
    if len(crs2filename.keys()) == 0 : cload()
    # It appears that we have to allow the file system some time for updating its inode tables
    waited=0
    while waited < 10 and not os.path.exists(filename) :
        time.sleep(0.5)
        waited += 1
    time.sleep(0.5)
    if os.path.exists(filename) :
        #while time.time() < os.path.getmtime(filename) + 0.2 : time.sleep(0.2)
        if re.findall(".nc$",filename) : 
            command="ncatted -h -a CRS_def,global,o,c,\"%s\" %s"%(crs,filename)
        if re.findall(".png$",filename) :
            command="convert -set \"CRS_def\" \"%s\" %s %s.png && mv -f %s.png %s"%\
                (crs,filename,filename,filename,filename)
        clogger.debug("trying stamping by %s"%command)
        if ( os.system(command) == 0 ) :
            crs2filename[crs]=filename
            clogger.info("%s registered as %s"%(crs,filename))
            return True
        else : 
            clogger.critical("cannot stamp by %s"%command)
            return None
    else :
        clogger.error("file %s does not exist (for crs %s)"%(filename,crs))
Esempio n. 4
0
def selectEmFiles(**kwargs):
    # Pour A et L : mon, day1, day2, 6hLev, 6hPlev, 3h
    simulation = kwargs['simulation']
    frequency = kwargs['frequency']
    variable = kwargs['variable']
    period = kwargs['period']
    realm = kwargs['realm']
    #
    freqs = {"mon": "", "3h": "_3h"}
    f = frequency
    if f in freqs:
        f = freqs[f]
    rep = []
    # Must look for all realms, here identified by a single letter
    if realm == "*":
        lrealm = ["A", "L", "O", "I"]
    else:
        lrealm = [realm]
    for realm in lrealm:
        clogger.debug("Looking for realm " + realm)
        # Use EM data for finding data dir
        freq_for_em = f
        if realm == 'I':
            freq_for_em = ""  # This is a special case ...
        command = [
            "grep", "^export EM_DIRECTORY_" + realm + freq_for_em + "=",
            os.path.expanduser(os.getenv("EM_HOME")) + "/expe_" + simulation
        ]
        try:
            ex = subprocess.Popen(command,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
        except:
            clogger.error("Issue getting archive_location for " + simulation +
                          " for realm " + realm + " with: " + repr(command))
            break
        if ex.wait() == 0:
            dir = ex.stdout.read().split("=")[1].replace('"',
                                                         "").replace("\n", "")
            clogger.debug("Looking at dir " + dir)
            if os.path.exists(dir):
                lfiles = os.listdir(dir)
                for fil in lfiles:
                    # clogger.debug("Looking at file "+fil)
                    fileperiod = periodOfEmFile(fil, realm, f)
                    if fileperiod and period.intersects(fileperiod):
                        if fileHasVar(dir + "/" + fil, variable):
                            rep.append(dir + "/" + fil)
                    # clogger.debug("Done with Looking at file "+fil)
            else:
                clogger.error(
                    "Directory %s does not exist for simulation %s, realm %s "
                    "and frequency %s" % (dir, simulation, realm, f))
        else:
            clogger.info("No archive location found for " + simulation +
                         " for realm " + realm + " with: " + repr(command))
    return rep
Esempio n. 5
0
def csync(update=False):
    """
    Merges current in-memory cache index and current on-file cache index
    for updating both

    If arg `update` is True, additionally ensures consistency between files
    set and index content, either :

    - if cache.stamping is true, by reading CRS in all files
    - else, by removing files which are not in the index; this may erase
      result files which have been computed by another running
      instance of CliMAF
    """
    #
    import pickle
    global cacheIndexFileName
    global dropped_crs

    # Merge index on file and index in memory
    file_index = cload(True)
    for crs in dropped_crs:
        file_index.pop(crs, None)
    crs2filename.update(file_index)

    # check if cache index is up to date; if not enforce consistency
    if update:
        clogger.info("Listing crs from files present in cache")
        files_in_cache = list_cache()
        files_in_cache.sort()
        files_in_index = crs2filename.values()
        files_in_index.sort()
        if files_in_index != files_in_cache:
            if stamping:
                clogger.info("Rebuilding cache index from file content")
                rebuild()
            else:
                clogger.warning(
                    'In no stamp mode, there is no way to seriously identify CRS from files in cache  !'
                )
                # clogger.warning('Removing cache files which content is not known.
                # This is an issue in concurrent mode !')
                # for fil in files_in_cache :
                #     if fil not in files_in_index :
                #         os.system("rm %"%fil)
                # else :
                # Should also remove empty files, as soon as
                # file creation will be atomic enough
    # Save index to disk
    fn = os.path.expanduser(cacheIndexFileName)
    try:
        with open(fn, "w") as cacheIndexFile:
            pickle.dump(crs2filename, cacheIndexFile)
        dropped_crs = []
    except:
        if update:
            if os.path.isfile(fn) and len(files_in_cache > 0):
                clogger.error("Issue when writing cache index %s" % fn)
Esempio n. 6
0
def cload(alt=None):
    global crs2filename
    global crs_not_yet_evaluable
    rep = dict()

    if len(crs2filename) != 0 and not alt:
        Climaf_Cache_Error(
            "attempt to reset file index - would lead to inconsistency !")
    try:
        cacheIndexFile = file(os.path.expanduser(cacheIndexFileName), "r")
        if alt:
            rep = pickle.load(cacheIndexFile)
        else:
            crs2filename = pickle.load(cacheIndexFile)
        cacheIndexFile.close()
    except:
        pass
        # clogger.debug("no index file yet")
    #
    must_check_index_entries = False
    if must_check_index_entries:
        # We may have some crs inherited from past sessions and for which
        # some operator may have become non-standard, or some projects are yet
        # undeclared
        crs_not_yet_evaluable = dict()
        allow_error_on_ds()
        for crs in crs2filename.copy():
            try:
                # print "evaluating crs="+crs
                eval(crs, sys.modules['__main__'].__dict__)
            except:
                print("Inconsistent cache object is skipped : %s" % crs)
                # clogger.debug("Inconsistent cache object is skipped : %s"%crs)
                p = guess_projects(crs)
                if p not in crs_not_yet_evaluable:
                    crs_not_yet_evaluable[p] = dict()
                crs_not_yet_evaluable[p][crs] = crs2filename[crs]
                crs2filename.pop(crs)
                # Analyze projects of inconsistent cache objects
                projects = crs_not_yet_evaluable.keys()
                if projects:
                    clogger.info(
                        "The cache has %d objects for non-declared projects %s.\n"
                        "For using it, consider including relevant project(s) "
                        "declaration(s) in ~/.climaf and restarting CliMAF.\n"
                        "You can also declare these projects right now and call 'csync(True)'\n"
                        "Or you can erase corresponding data by 'crm(pattern=...project name...)'"
                        % (len(crs_not_yet_evaluable), repr(list(projects))))
        allow_error_on_ds(False)
    if alt:
        return rep
Esempio n. 7
0
def cdrop(obj, rm=True):
    """
    Deletes the cached file for a CliMAF object, if it exists

    Args:
     obj (cobject or string) : object to delete, or its string representation (CRS)

     rm (bool) : for advanced use only; should we actually delete (rm) the file, or just forget it in CliMAF cache index
    
    Returns:
     None if object does not exists, False if failing to delete, True if OK

    Example ::

    >>> dg=ds(project='example', simulation='AMIPV6ALB2G', variable='tas', period='1980-1981')
    >>> f=cfile(dg)
    >>> os.system('ls -al '+f)
    >>> cdrop(dg)
    
    """
    global crs2filename

    if (isinstance(obj, cobject)):
        crs = ` obj `
        if (isinstance(obj, cdataset)): crs = "select(" + crs + ")"
    elif type(obj) is str: crs = obj
    else:
        clogger.error("%s is not a CliMAF object" % ` obj `)
        return
    if crs in crs2filename:
        clogger.info("discarding cached value for " + crs)
        fil = crs2filename.pop(crs)
        if rm:
            try:
                path_file = os.path.dirname(fil)
                os.remove(fil)
                try:
                    os.rmdir(path_file)
                except OSError as ex:
                    clogger.warning(ex)

                return True
            except:
                clogger.warning(
                    "When trying to remove %s : file does not exist in cache" %
                    crs)
                return False
    else:
        clogger.info("%s is not cached" % crs)
        return None
Esempio n. 8
0
def selectEmFiles(**kwargs) :
    #POur A et L : mon, day1, day2, 6hLev, 6hPlev, 3h
    simulation=kwargs['simulation']
    frequency=kwargs['frequency']
    variable=kwargs['variable']
    period=kwargs['period']
    realm=kwargs['realm']
    #
    freqs={ "mon" : "" , "3h" : "_3h"}
    f=frequency
    if f in freqs : f=freqs[f]
    rep=[]
    # Must look for all realms, here identified by a single letter
    if realm=="*" : lrealm= ["A", "L", "O", "I" ]
    else: lrealm=[ realm ]
    for realm in lrealm :
        clogger.debug("Looking for realm "+realm)
        # Use EM data for finding data dir
        freq_for_em=f
        if realm == 'I' : freq_for_em=""  # This is a special case ...
        command=["grep", "^export EM_DIRECTORY_"+realm+freq_for_em+"=",
                 os.path.expanduser(os.getenv("EM_HOME"))+"/expe_"+simulation ]
        try :
            ex = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        except :
            clogger.error("Issue getting archive_location for "+
                          simulation+" for realm "+realm+" with: "+`command`)
            break
        if ex.wait()==0 :
            dir=ex.stdout.read().split("=")[1].replace('"',"").replace("\n","")
            clogger.debug("Looking at dir "+dir)
            if os.path.exists(dir) :
                lfiles= os.listdir(dir)
                for fil in lfiles :
                    #clogger.debug("Looking at file "+fil)
                    fileperiod=periodOfEmFile(fil,realm,f)
                    if fileperiod and period.intersects(fileperiod) :
                        if fileHasVar(dir+"/"+fil,variable) :
                            rep.append(dir+"/"+fil)
                    #clogger.debug("Done with Looking at file "+fil)
            else : clogger.error("Directory %s does not exist for EM simulation %s, realm %s "
                                 "and frequency %s"%(dir,simulation,realm,f))
        else :
            clogger.info("No archive location found for "+
                          simulation+" for realm "+realm+" with: "+`command`)
    return rep
Esempio n. 9
0
def cload() :
    global crs2filename 
    global crs_not_yet_evaluable

    if len(crs2filename) != 0 :
        Climaf_Driver_Error(
            "attempt to reset file index - would lead to inconsistency !")
    try :
        cacheIndexFile=file(os.path.expanduser(cacheIndexFileName), "r")
        crs2filename=pickle.load(cacheIndexFile)
        cacheIndexFile.close()
    except:
        pass
        #clogger.debug("no index file yet")
    #
    must_check_index_entries=False
    if (must_check_index_entries) :
        # We may have some crs inherited from past sessions and for which
        # some operator may have become non-standard, or some projects are yet 
        # undeclared
        crs_not_yet_evaluable=dict()
        allow_error_on_ds()
        for crs in crs2filename.copy() :
            try :
                #print "evaluating crs="+crs
                eval(crs, sys.modules['__main__'].__dict__)
            except:
                print ("Inconsistent cache object is skipped : %s"%crs)
                #clogger.debug("Inconsistent cache object is skipped : %s"%crs)
                p=guess_projects(crs)
                if p not in crs_not_yet_evaluable : crs_not_yet_evaluable[p]=dict()
                crs_not_yet_evaluable[p][crs]=crs2filename[crs]
                crs2filename.pop(crs)
                # Analyze projects of inconsistent cache objects
                projects= crs_not_yet_evaluable.keys()
                if projects :
                    clogger.info( 
                        "The cache has %d objects for non-declared projects %s.\n"
                        "For using it, consider including relevant project(s) "
                        "declaration(s) in ~/.climaf and restarting CliMAF.\n"
                        "You can also declare these projects right now and call 'csync(True)'\n"
                        "Or you can erase corresponding data by 'crm(pattern=...project name...)'"% \
                        (len(crs_not_yet_evaluable),`list(projects)`))
        allow_error_on_ds(False)
Esempio n. 10
0
def read(filename):
    """
    Read macro dictionary from filename, and add it to cmacros[]
    """
    import json
    global cmacros
    macros_texts=None
    try :
        macrofile=file(os.path.expanduser(filename), "r")
        clogger.debug("Macrofile %s read"%(macrofile))
        macros_texts=json.load(macrofile)
        clogger.debug("After reading file %s, macros=%s"%(macrofile,`macros_texts`))
        macrofile.close()
    except:
        clogger.info("Issue reading macro file %s ", filename)
    if macros_texts :
        for m in macros_texts :
            clogger.debug("loading macro %s=%s"%(m,macros_texts[m]))
            macro(str(m),str(macros_texts[m]))
Esempio n. 11
0
def cdrop(obj, rm=True) :
    """
    Deletes the cached file for a CliMAF object, if it exists

    Args:
     obj (cobject or string) : object to delete, or its string representation (CRS)

     rm (bool) : for advanced use only; should we actually delete (rm) the file, or just forget it in CliMAF cache index
    
    Returns:
     None if object does not exists, False if failing to delete, True if OK

    Example ::

    >>> dg=ds(project='example', simulation='AMIPV6ALB2G', variable='tas', period='1980-1981')
    >>> f=cfile(dg)
    >>> os.system('ls -al '+f)
    >>> cdrop(dg)
    
    """
    global crs2filename

    if (isinstance(obj,cobject) ):
        crs=`obj`
        if (isinstance(obj, cdataset) ) : crs="select("+crs+")"
    elif type(obj) is str : crs=obj
    else :
        clogger.error("%s is not a CliMAF object"%`obj`)
        return
    if crs in crs2filename :
        clogger.info("discarding cached value for "+crs)
        fil=crs2filename.pop(crs)
        if rm :
            try :
                os.remove(fil)
                return True
            except:
                clogger.warning("When trying to remove %s : file does not exist in cache"%crs)
                return False
    else :
        clogger.info("%s is not cached"%crs)
        return None
Esempio n. 12
0
def read(filename):
    """
    Read macro dictionary from filename, and add it to cmacros[]
    """
    import json
    global cmacros
    macros_texts = None
    try:
        macrofile = file(os.path.expanduser(filename), "r")
        clogger.debug("Macrofile %s read" % (macrofile))
        macros_texts = json.load(macrofile)
        clogger.debug("After reading file %s, macros=%s" %
                      (macrofile, ` macros_texts `))
        macrofile.close()
    except:
        clogger.info("Issue reading macro file %s ", filename)
    if macros_texts:
        for m in macros_texts:
            clogger.debug("loading macro %s=%s" % (m, macros_texts[m]))
            macro(str(m), str(macros_texts[m]))
Esempio n. 13
0
def cprotect(obj, stop=False):
    """
    Protects the cache file for a given object (or stops protection with arg 'stop=True').

    In order to erase it, argument 'force=True' must then be used with function
    :py:func:`~climaf.cache.craz` or :py:func:`~climaf.cache.cdrop`

    """
    if isinstance(obj, cobject):
        crs = repr(obj)
        if isinstance(obj, cdataset):
            crs = "select(" + crs + ")"
    elif type(obj) is str:
        crs = obj
    else:
        clogger.error("%s is not a CliMAF object" % repr(obj))
        return
    if crs in crs2filename:
        if stop is False:
            clogger.info("Protecting cached value for " + crs)
            os.system("chmod -w " + crs2filename[crs])
        else:
            clogger.info("Stopping protection on cached value for " + crs)
            os.system("chmod +w " + crs2filename[crs])
        return
    else:
        clogger.info("%s is not (yet) cached; use cfile() to cache it" % crs)
Esempio n. 14
0
def csync(update=False):
    """
    Merges current in-memory cache index and current on-file cache index 
    for updating both

    If arg `update` is True, additionnaly ensures consistency between files
    set and index content, either :

    - if cache.stamping is true, by reading CRS in all files
    - else, by removing files which are not in the index; this may erase 
      result files which have been computed by another running 
      instance of CliMAF
    """
    #
    import pickle
    global cacheIndexFileName

    # Merge index on file and index in memory
    file_index = cload(True)
    crs2filename.update(file_index)

    # check if cache index is up to date; if not
    # enforce consistency
    if update:
        clogger.info("Listing crs from files present in cache")
        files_in_cache = list_cache()
        files_in_cache.sort()
        files_in_index = crs2filename.values()
        files_in_index.sort()
        if files_in_index != files_in_cache:
            if stamping:
                clogger.info("Rebuilding cache index from file content")
                rebuild()
            else:
                clogger.info('Removing cache files which content is not known')
                for fil in files_in_cache:
                    if fil not in files_in_index:
                        os.system("rm %" % fil)
                    #else :
                    # Should also remove empty files, as soon as
                    # file creation will be atomic enough
    # Save to disk
    try:
        cacheIndexFile = file(os.path.expanduser(cacheIndexFileName), "w")
        pickle.dump(crs2filename, cacheIndexFile)
        cacheIndexFile.close()
    except:
        clogger.info("No cache index file yet")
Esempio n. 15
0
def selectGenericFiles(urls, return_wildcards=None,merge_periods_on=None,**kwargs):
    """
    Allow to describe a ``generic`` file organization : the list of files returned 
    by this function is composed of files which :

    - match the patterns in ``url`` once these patterns are instantiated by 
      the values in kwargs, and 

     - contain the ``variable`` provided in kwargs

     - match the `period`` provided in kwargs
    
    In the pattern strings, no keyword is mandatory. However, for remote files,
    filename pattern must include ${varname}, which is instanciated by variable
    name or ``filenameVar`` (given via :py:func:`~climaf.classes.calias()`); this is  
    for the sake of efficiency (please complain if inadequate)
   
    Example :

    >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*${PERIOD}*.nc)']
    /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc

    In the pattern strings, the keywords that can be used in addition to the argument
    names (e.g. ${model}) are:
    
    - ${variable} : use it if the files are split by variable and 
      filenames do include the variable name, as this speed up the search

    - ${PERIOD} : use it for indicating the period covered by each file, if this 
      is applicable in the file naming; this period can appear in filenames as 
      YYYY, YYYYMM, YYYYMMDD, YYYYMMDDHHMM, either once only, or twice with 
      separator ='-' or '_'

    - wildcards '?' and '*' for matching respectively one and any number of characters


    """
    def store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards, merge_periods_on=None, 
                                    fperiod=None,periods=None,periods_dict=None):
        """"
        """
        if fperiod is not None and periods is not None :
            clogger.debug('Adding period %s'%fperiod)
            periods.append(fperiod)
        #
        for kw in kwargs :
            it=re.finditer(facets_regexp,f)
            for oc in it :
                try : facet_value=oc.group(kw)
                except : continue
                if type(kwargs[kw]) is str and ("*" in kwargs[kw] or "?" in kwargs[kw] ):
                    if facet_value is not None :
                        if kw not in wildcards : wildcards[kw]=set()
                        wildcards[kw].add(facet_value)
                        clogger.debug("Discover %s=%s for file=%s"%(kw,facet_value,f))
                    else :
                        clogger.debug("Logic issue for kw=%s and file=%s"%(kw,f))
                    #
                    if fperiod is not None and periods is not None :
                        if merge_periods_on is None : key=None
                        elif kw == merge_periods_on : key=facet_value
                        else :
                            #print "Skipping for kw=%s,sort=%s"%(kw,merge_periods_on)
                            continue                        
                        if key not in periods_dict: periods_dict[key]=set()
                        #print "adding period %s for key %s"%(fperiod,key) 
                        periods_dict[key].add(fperiod)
                    else:
                        pass
                        #print "no Adding period for %s=%s for %s"%(kw,facet_value,f)
        #print "end of store, periods_dict=",periods_dict, "wild=",wildcards

    rep=[]
    #
    periods=None # a list of periods available
    periods_dict=dict()
    #
    period=kwargs['period'] ;
    if period == "*" :
        periods=[] # List of all periods
    elif type(period) is str : period=init_period(period)
    #
    variable=kwargs['variable']
    altvar=kwargs.get('filenameVar',variable)
    #
    # dicts of date patterns, for globbing and for regexp
    #
    digit="[0-9]"
    date_glob_patt={ "${PERIOD}" : "*" } 
    # an ordered list of dates keywords
    date_keywords=date_glob_patt.keys() ; date_keywords.sort(reverse=True)
    #
    annee="%s{4}"%digit
    mois="(01|02|03|04|05|06|07|08|09|10|11|12)"
    jour="([0-3][0-9])"
    heure="(00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23)"
    minutes="[0-5][0-9]"
    date="%s(%s(%s(%s(%s)?)?)?)?"%(annee,mois,jour,heure,minutes)
    rperiod="(?P<period>(?P<start>%s)([_-](?P<end>%s))?)"%(date,date)
    date_regexp_patt={ "${PERIOD}" : rperiod } 
    # an ordered list of dates regexp keywords
    date_regexp_keywords=date_regexp_patt.keys() ; date_regexp_keywords.sort(reverse=True)
    #
    #
    for l in urls :
        # Instantiate keywords in pattern with attributes values
        remote_prefix="" ;
        if re.findall(".*:.*",l) :
            remote_prefix=':'.join(l.split(":")[0:-1])+':'
        basename=l.split(":")[-1] # This discard the remote_prefix if any
        basename=basename.replace("//","/")
        my_template=Template(basename)
        template=my_template.safe_substitute(**kwargs)
        #print "template after attributes replace : "+template
        #
        # Construct a pattern for globbing dates
        temp2=template
        for k in date_keywords : temp2=temp2.replace(k,date_glob_patt[k])
        # Do globbing with plain varname
        if remote_prefix : 
            lfiles=sorted(glob_remote_data(remote_prefix, temp2))
            clogger.debug("Remote globbing %d files for varname on %s : "%\
                          (len(lfiles),remote_prefix+temp2))
        else: # local data
            lfiles=sorted(glob.glob(temp2))
            clogger.debug("Before regexp filtering : Globbing %d files for varname on %s : "%(len(lfiles),temp2))
            # Must filter with regexp, because * with glob is too inclusive
            alt=[]
            for f in lfiles :
                for k in date_keywords :
                    if re.search(date_regexp_patt[k],f) :
                        alt.append(f)
                        continue
            lfiles=alt
            clogger.debug("Globbing %d files for varname on %s : "%(len(lfiles),temp2))
        #
        # If unsuccessful using varname, try with filenameVar
        if len(lfiles)==0 and "filenameVar" in kwargs and kwargs['filenameVar'] :
            # Change value of facet 'variable'
            kwargs['variable']=kwargs['filenameVar']
            template=my_template.safe_substitute(**kwargs)
            temp2=template
            for k in date_keywords : temp2=temp2.replace(k,date_glob_patt[k])
            #
            # Do globbing with fileVarname
            if remote_prefix : # 
                lfiles=sorted(glob_remote_data(remote_prefix, temp2))
                clogger.debug("Remote globbing %d files for filenamevar on %s: "%\
                              (len(lfiles),remote_prefix+temp2))
            else: # local data
                lfiles=sorted(glob.glob(temp2))
                # Must filter with regexp, because * with glob is too inclusive
                alt=[]
                for f in lfiles :
                    for k in date_keywords :
                        if re.search(date_regexp_patt[k],f) :
                            alt.append(f)
                            continue
                lfiles=alt
                clogger.debug("Globbing %d files for filenamevar on %s: "%(len(lfiles),temp2))
        #
        # For discovering values for those facets which are a wildcard,
        # construct a regexp with a group name for all facets (but period)
        alt_basename=basename.replace("?",".").replace("*",".*")
        alt_kwargs=kwargs.copy()
        for kw in kwargs :
            if type(kwargs[kw]) is str : # This excludes period attribute, which has a type
                alt_kwargs[kw]=kwargs[kw].replace("?",".").replace("*",".*")
                alt_basename=alt_basename.replace(r"${%s}"%kw,r"(?P<%s>%s)"%(kw,alt_kwargs[kw]),1)
        facets_regexp=Template(alt_basename).safe_substitute(**alt_kwargs)
        for k in date_regexp_keywords :
            facets_regexp=facets_regexp.replace(k,date_regexp_patt[k],1)
            facets_regexp=facets_regexp.replace(k,".*")
        wildcards=dict()
        #print "facets_regexp=",facets_regexp
        #
        # Construct regexp for extracting dates from filename
        date_regexp=None
        template_toreg=template.replace("*",".*").replace("?",r".").replace("+","\+")
        #print "template before searching dates : "+template_toreg
        for key in date_regexp_keywords :
            #print "searchin "+key+" in "+template
            start=template_toreg.find(key)
            if (start>=0 ) :
                date_regexp=template_toreg.replace(key,date_regexp_patt[key],1)
                #print "found ",key," dateregexp ->",date_regexp
                hasEnd=False
                start=date_regexp.find(key)
                #start=date_regexp.find(key)
                if (start >=0 ) :
                    hasEnd=True
                    date_regexp=date_regexp.replace(key,date_regexp_patt[key],1)
                    #date_regexp=date_regexp.replace(key,date_regexp_patt[key],1)
                break
        #print "date_regexp before searching dates : "+date_regexp
        #
        for f in lfiles :
            #print "processing file "+f
            #
            # Extract file time period
            #
            fperiod=None
            if date_regexp :
                if "P<period>" in date_regexp :
                    #print "date_rexgep=",date_regexp
                    #print "f=",f
                    #print "period=",re.sub(date_regexp,r'\g<period>',f)
                    tperiod=re.sub(date_regexp,r'\g<period>',f)
                    if tperiod==f :
                        raise classes.Climaf_Error("Cannot find a period in %s with regexp %s"%(f,date_regexp))
                    fperiod=init_period(tperiod)
                else:
                    date_regexp0=date_regexp 
                    #print "date_regexp for extracting dates : "+date_regexp0, "file="+f
                    start=re.sub(date_regexp0,r'\1',f)
                    if start==f:
                        raise Climaf_Data_Error("Start period not found in %s using regexp %s"%(f,regexp0)) #? 
                    if hasEnd :
                        end=re.sub(date_regexp0,r'\2',f)
                        fperiod=init_period("%s-%s"%(start,end))
                    else :
                        fperiod=init_period(start)
                #print "period for file %s is %s"%(f,fperiod)
                #
                # Filter file time period against required period
            else :
                if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \
                    kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) :
                    # local data
                    if not remote_prefix and \
                       ( (basename.find("${variable}")>=0) or variable=='*' or \
                         fileHasVar(f,variable) or (variable != altvar and fileHasVar(f,altvar)) ) :
                        clogger.debug("adding fixed field :"+f)
                        store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on)
                        rep.append(f)
                    # remote data
                    elif remote_prefix :
                        if (basename.find("${variable}")>=0) or variable=='*' or \
                           (variable != altvar and (f.find(altvar)>=0) ):
                            clogger.debug("adding fixed field :"+remote_prefix+f)
                            store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on)
                            rep.append(remote_prefix+f)
                        else:
                            raise classes.Climaf_Error(
                                "For remote files, filename pattern (%s) should include ${varname} "+\
                                "(which is instanciated by variable name or filenameVar)"%f)
                else :
                    clogger.info("Cannot yet filter files re. time using only file content.")
                    store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on)
                    rep.append(f)
            
            #
            # If file period matches requested period, check similarly for variable
            #
            #print "fperiod=",fperiod
            #print "periods=",periods
            #print "inter=",period.intersects(fperiod)
            #print "date_regexp=",date_regexp
            if (fperiod and ( periods is not None or period.intersects(fperiod) )) \
               or not date_regexp :
                #
                clogger.debug('Period is OK - Considering variable filtering on %s and %s for %s'%(variable,altvar,f)) 
                # Filter against variable 
                if (l.find("${variable}")>=0):
                    clogger.debug('appending %s based on variable in filename'%f)
                    store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on,
                                                fperiod,periods,periods_dict)
                    rep.append(remote_prefix+f)
                    continue    
                if (f not in rep):
                    # local data
                    if not remote_prefix and \
                        (variable=='*' or "," in variable or fileHasVar(f,variable) or \
                        (altvar != variable and fileHasVar(f,altvar))) :
                        # Should check time period in the file if not date_regexp
                        clogger.debug('appending %s based on multi-var or var exists in file '%f)
                        store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on,
                                                    fperiod,periods,periods_dict)
                        rep.append(f)
                        continue
                    # remote data
                    elif remote_prefix  : 
                        if variable=='*' or "," in variable or \
                            (variable != altvar and (f.find(altvar)>=0) ):
                            # Should check time period in the file if not date_regexp
                            clogger.debug('appending %s based on multi-var or altvar '%(remote_prefix+f))
                            store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards, merge_periods_on,
                                                        fperiod,periods,periods_dict)
                            rep.append(remote_prefix+f)
                            continue
                        else:
                            mess="For remote files, filename pattern (%s) should include"%(remote_prefix+f)
                            mess+=" ${varname} (which is instanciated by variable name or filenameVar)"
                            raise classes.Climaf_Error(mess)
            else:
                if not fperiod :
                    clogger.debug('not appending %s because period is None '%f)
                elif not period.intersects(fperiod) :
                        clogger.debug('not appending %s because period doesn t intersect %s'%(f,period))
                else:
                    clogger.debug('not appending %s for some other reason %s'%(f))

        # Break on first url with any matching data
        if len(rep)>0 :
            clogger.debug('url %s does match for '%l + `kwargs`)
            break

    #  For wildcard facets, discover facet values + checks
    for facet in wildcards:
        s=wildcards[facet]
        if return_wildcards is not None :
            if facet=="period" :
                #print "s=",s," periods_dict=",periods_dict
                for val in periods_dict : 
                    periods_dict[val]=sort_periods_list(list(periods_dict[val]))
                clogger.info("Attribute period='*' has values %s"%(periods_dict))
                return_wildcards["period"]=periods_dict
            else:
                if len(s) == 1 :
                    s=s.pop()
                    clogger.info("Attribute %s='%s' has matching value '%s'"%(facet,kwargs[facet],s))
                    return_wildcards[facet]=s
                else:
                    rep=list(s); rep.sort()
                    return_wildcards[facet]=rep
                    message="Attribute %s='%s' has multiple values : %s"%(facet,kwargs[facet],list(s))
                    if return_wildcards : clogger.info(message)
                    else: clogger.error(message)
                s=return_wildcards[facet]
        else:
            clogger.debug("return_wildcards is None")
    return rep
Esempio n. 16
0
def cdrop(obj, rm=True, force=False):
    """
    Deletes the cached file for a CliMAF object, if it exists

    Args:
     obj (cobject or string) : object to delete, or its string representation (CRS)

     force (bool) : should we delete the object even if it is 'protected'

     rm (bool) : for advanced use only; should we actually delete (rm) the file, or just forget it in CliMAF cache index

    Returns:
     None if object does not exists, False if failing to delete, True if OK

    Example ::

    >>> dg=ds(project='example', simulation='AMIPV6ALB2G', variable='tas', period='1980-1981')
    >>> f=cfile(dg)
    >>> os.system('ls -al '+f)
    >>> cdrop(dg)

    """
    global crs2filename
    global dropped_crs

    if isinstance(obj, cobject):
        crs = repr(obj)
        if isinstance(obj, cdataset):
            crs = "select(" + crs + ")"
    elif type(obj) is str:
        crs = obj
    else:
        clogger.error("%s is not a CliMAF object" % repr(obj))
        return
    if crs in crs2filename:
        clogger.info("Discarding cached value for %s (expect if protected)" %
                     crs)
        fil = crs2filename[crs]
        if rm:
            try:
                if force:
                    os.system("chmod +w " + fil)
                if not os.access(fil, os.W_OK):
                    clogger.info("Object %s is protected" % crs)
                    return
                path_file = os.path.dirname(fil)
                os.remove(fil)
                crs2filename.pop(crs)
                dropped_crs.append(crs)
                try:
                    os.rmdir(path_file)
                except OSError as ex:
                    clogger.warning(ex)

                return True
            except:
                clogger.warning(
                    "When trying to remove %s : file does not exist in cache" %
                    crs)
                return False
    else:
        clogger.info("%s is not cached" % crs)
        return None
Esempio n. 17
0
def register(filename, crs, outfilename=None):
    """
    Adds in FILE a metadata named 'CRS_def' and with value CRS, and a
    metadata 'CLiMAF' with CliMAF version and ref URL

    Records this FILE in dict crs2filename

    If OUTFILENAME is not None, FILENAME is a temporary file and
    it's OUTFILENAME which is recorded in dict crs2filename

    Silently skip non-existing files
    """
    # First read index from file if it is yet empty - No : done at startup
    # if len(crs2filename.keys()) == 0 : cload()
    # It appears that we have to let some time to the file system  for updating its inode tables
    global dropped_crs
    if not stamping:
        clogger.debug('No stamping')
        crs2filename[crs] = filename
        return True
    waited = 0
    while waited < 50 and not os.path.exists(filename):
        time.sleep(0.1)
        waited += 1
    # time.sleep(0.5)
    if os.path.exists(filename):
        # while time.time() < os.path.getmtime(filename) + 0.2 : time.sleep(0.2)
        if re.findall(".nc$", filename):
            command = "ncatted -h -a CRS_def,global,o,c,\"%s\" -a CliMAF,global,o,c,\"CLImate Model Assessment " \
                      "Framework version %s (http://climaf.rtfd.org)\" %s" % (crs, version, filename)
        if re.findall(".png$", filename):
            crs2 = crs.replace("%", "\%")
            command = "convert -set \"CRS_def\" \"%s\" -set \"CliMAF\" \"CLImate Model Assessment Framework version " \
                      "%s (http://climaf.rtfd.org)\" %s %s.png && mv -f %s.png %s" % \
                      (crs2, version, filename, filename, filename, filename)
        if re.findall(".pdf$", filename):
            tmpfile = str(uuid.uuid4())
            command = "pdftk %s dump_data output %s && echo -e \"InfoBegin\nInfoKey: Keywords\nInfoValue: %s\" >> %s " \
                      "&& pdftk %s update_info %s output %s.pdf && mv -f %s.pdf %s && rm -f %s" % \
                      (filename, tmpfile, crs, tmpfile, filename, tmpfile, filename, filename, filename, tmpfile)
        if re.findall(".eps$", filename):
            command = 'exiv2 -M"add Xmp.dc.CliMAF CLImate Model Assessment Framework version %s ' \
                      '(http://climaf.rtfd.org)" -M"add Xmp.dc.CRS_def %s" %s' % \
                      (version, crs, filename)
        clogger.debug("trying stamping by %s" % command)
        if os.system(command) == 0:
            if outfilename:
                cmd = 'mv -f %s %s ' % (filename, outfilename)
                if os.system(cmd) == 0:
                    clogger.info("move %s as %s " % (filename, outfilename))
                    clogger.info("%s registered as %s" % (crs, outfilename))
                    crs2filename[crs] = outfilename
                    if crs in dropped_crs:
                        dropped_crs.remove(crs)
                    return True
                else:
                    clogger.critical("cannot move by" % cmd)
                    exit()
                    return None
            else:
                clogger.info("%s registered as %s" % (crs, filename))
                crs2filename[crs] = filename
                if crs in dropped_crs:
                    dropped_crs.remove(crs)
                return True
        else:
            clogger.critical("cannot stamp by %s" % command)
            exit()
            return None
    else:
        clogger.error("file %s does not exist (for crs %s)" % (filename, crs))
Esempio n. 18
0
def ceval_script (scriptCall,deep,recurse_list=[]):
    """ Actually applies a CliMAF-declared script on a script_call object 
    
    Prepare operands as fiels and build command from operands and parameters list
    Assumes that scripts are described in dictionary 'scripts'  by templates as
    documented in operators.cscript
    
    Returns a CLiMAF cache data filename
    """
    script=operators.scripts[scriptCall.operator]
    template=Template(script.command)

    # Evaluate input data 
    dict_invalues=dict()
    sizes=[]
    for op in scriptCall.operands :
        inValue=ceval(op,userflags=scriptCall.flags,format='file',deep=deep,
                      recurse_list=recurse_list)
        if inValue is None or inValue is "" :
            raise Climaf_Driver_Error("When evaluating %s : value for %s is None"\
                                      %(scriptCall.script,`op`))
        if isinstance(inValue,list) : size=len(inValue)
        else : size=1
        sizes.append(size)
        dict_invalues[op]=inValue
    #
    # Replace input data placeholders with filenames
    subdict=dict()
    opscrs=""
    if 0 in script.inputs :
        label,multiple,serie=script.inputs[0]
        op=scriptCall.operands[0]
        infile=dict_invalues[op]
        if not all(map(os.path.exists,infile.split(" "))) :
            raise Climaf_Driver_Error("Internal error : some input file does not exist among %s:"%(infile))
        subdict[ label ]=infile
        #if scriptCall.flags.canSelectVar :
        subdict["var"]=varOf(op)
        if isinstance(op,classes.cdataset) and op.alias and scriptCall.flags.canAlias:
            filevar,scale,offset,units,filenameVar,missing=op.alias
            #if script=="select" and ((varOf(op) != filevar) or scale != 1.0 or offset != 0.) :
            if ((varOf(op) != filevar) or scale != 1.0 or offset != 0.) :
                subdict["alias"]="%s,%s,%.4g,%.4g"%(varOf(op),filevar,scale,offset)
                subdict["var"]=filevar
            if units : subdict["units"]=units 
            if scriptCall.flags.canMissing and missing :
                subdict["missing"]=missing
        if isinstance(op,classes.cens) :
            if not multiple :
                raise Climaf_Driver_Error(
                    "Script %s 's input #%s cannot accept ensemble %s"\
                        %(scriptCall.script,0,`op`))
            #subdict["labels"]=r'"'+reduce(lambda x,y : "'"+x+"' '"+y+"'", op.labels)+r'"'
            subdict["labels"]=reduce(lambda x,y : x+"$"+y, op.labels)
        per=timePeriod(op)
        if not per.fx and str(per) != "" and scriptCall.flags.canSelectTime:
            subdict["period"]=str(per)
            subdict["period_iso"]=per.iso()
        if scriptCall.flags.canSelectDomain :
            subdict["domain"]=domainOf(op)
    i=0
    for op in scriptCall.operands :
        opscrs += op.crs+" - "
        infile=dict_invalues[op]
        if not all(map(os.path.exists,infile.split(" "))) :
            raise Climaf_Driver_Error("Internal error : some input file does not exist among %s:"%(infile))
        i+=1
        if ( i> 1 or 1 in script.inputs) :
            label,multiple,serie=script.inputs[i]
            subdict[ label ]=infile
            # Provide the name of the variable in input file if script allows for
            subdict["var_%d"%i]=varOf(op)
            if isinstance(op,classes.cdataset) and op.alias :
                filevar,scale,offset,units,filenameVar,missing =op.alias
                if (varOf(op) != filevar) or (scale != 1.0) or (offset != 0.) :
                    subdict["alias_%d"%i]="%s %s %f %f"%(varOf(op),filevar,scale,offset)
                    subdict["var_%d"%i]=filevar
		if units : subdict["units_%d"%i]=units 
		if missing : subdict["missing_%d"%i]=missing
            # Provide period selection if script allows for
            per=timePeriod(op)
            if not per.fx and per != "":
                subdict["period_%d"%i]=str(per)
                subdict["period_iso_%d"%i]=per.iso()
            subdict["domain_%d"%i]=domainOf(op)
    clogger.debug("subdict for operands is "+`subdict`)
    # substitution is deffered after scriptcall parameters evaluation, which may
    # redefine e.g period
    #
    # Provide one cache filename for each output and instantiates the command accordingly
    if script.outputFormat is not None :
        # Compute a filename for each ouptut
        # Un-named main output
        main_output_filename=cache.generateUniqueFileName(scriptCall.crs,
                                                          format=script.outputFormat)
        subdict["out"]=main_output_filename
        subdict["out_"+scriptCall.variable]=main_output_filename
        # Named outputs
        for output in scriptCall.outputs:
            subdict["out_"+output]=cache.generateUniqueFileName(scriptCall.crs+"."+output,\
                                                         format=script.outputFormat)
    # Account for script call parameters
    for p in scriptCall.parameters : 
        #clogger.debug("processing parameter %s=%s"%(p,scriptCall.parameters[p]))
        subdict[p]=scriptCall.parameters[p]
        if p=="period" :
            subdict["period_iso"]=init_period(scriptCall.parameters[p]).iso()
    subdict["crs"]=opscrs.replace("'","")
    #
    # Combine CRS and possibly member_label to provide/complement title 
    if 'title' not in subdict :
        if 'member_label' in subdict :
            subdict["title"]=subdict['member_label']
        else:
            subdict["title"]=subdict["crs"]
    else: 
        if 'member_label' in subdict :
            subdict["title"]=subdict["title"]+" "+subdict['member_label']
            subdict.pop('member_label')
    #
    # Substitute all args
    template=template.safe_substitute(subdict)
    #
    # Allowing for some formal parameters to be missing in the actual call:
    #
    # Discard remaining substrings looking like :
    #  some_word='"${some_keyword}"'     , or:
    #  '"${some_keyword}"'
    template=re.sub(r'(\w*=)?(\'\")?\$\{\w*\}(\"\')?',r"",template)
    #
    # Discard remaining substrings looking like :
    #  some_word=${some_keyword}          , or
    #  ${some_keyword}
    template=re.sub(r"(\w*=)?\$\{\w*\}",r"",template)
    #
    # Launch script using command, and check termination 
    #command="PATH=$PATH:"+operators.scriptsPath+template+fileVariables
    #command="echo '\n\nstdout and stderr of script call :\n\t "+template+\
    #         "\n\n'> scripts.out  ; "+ template+ " >> scripts.out 2>&1"

    tim1=time.time()
    clogger.info("Launching command:"+template)
    #
    command=subprocess.Popen(template, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
    command.wait()
    #
    logfile=open('last.out', 'w')
    logfile.write("\n\nstdout and stderr of script call :\n\t "+template+"\n\n")
    command_std=""
    for line in command.stdout:
        command_std+=line
        logfile.write(line)
    logfile.close()
    
    if ( command.wait() == 0 ):
        if script.outputFormat is not None :
            # Tagging output files with their CliMAF Reference Syntax definition
            # Un-named main output
            ok = cache.register(main_output_filename,scriptCall.crs)
            # Named outputs
            for output in scriptCall.outputs:
                ok = ok and cache.register(subdict["out_"+output],\
                                           scriptCall.crs+"."+output)
            if ok : 
                duration=time.time() - tim1
                print("Done in %.1f s with script computation for %s "%\
                          (duration,`scriptCall`),file=sys.stderr)
                clogger.debug("Done in %.1f s with script computation for "
                              "%s (command was :%s )"%\
                                  (duration,`scriptCall`,template))
                return main_output_filename
            else :
                raise Climaf_Driver_Error("Some output missing when executing "
                                          ": %s. \n See last.out"%template)
        else :
            clogger.debug("script %s has no output"%script.name)
            return None
    else:
        clogger.debug("Full script output:\n"+command_std)
        comm2=subprocess.Popen(["tail", "-n", "10", "last.out"], stdout=subprocess.PIPE)
        clogger.error("Last lines of script output:\n"+comm2.stdout.read())
        raise Climaf_Driver_Error("Script failure for : %s. More details either in file "
                                  "./last.out or by re-runing with clog(\"debug\")" %template)
Esempio n. 19
0
def ceval(cobject, userflags=None, format="MaskedArray",
          deep=None, derived_list=[], recurse_list=[]) :
    """ 
    Actually evaluates a CliMAF object, either as an in-memory data structure or
    as a string of filenames (which either represent a superset or exactly includes
    the desired data)
    
    - with arg deep=True , re-evaluates all components
    - with arg deep=False, re-evaluates top level operation
    - without arg deep   , use cached values as far as possible

    arg derived_list is the list of variables that have been considered as 'derived'
    (i.e. not natives) in upstream evaluations. It avoids to loop endlessly
    """
    if format != 'MaskedArray' and format != 'file' and format != 'png' :
	raise Climaf_Driver_Error('Allowed formats yet are : "object", "file" and "png"')
    #
    if userflags is None : userflags=operators.scriptFlags()
    #
    # Next check is too crude for dealing with use of operator 'select'
    #if cobject.crs in recurse_list :
    #    clogger.critical("INTERNAL ERROR : infinite loop on object: "+cobject.crs)
    #    return None
    cindent()
    if isinstance(cobject,classes.cdataset):
        recurse_list.append(cobject.crs)
        clogger.debug("Evaluating dataset operand " + cobject.crs + "having kvp=" + `cobject.kvp`)
        ds=cobject
        if ds.isLocal() or ds.isCached() :
            clogger.debug("Dataset %s is local or cached "%ds )
            #  if the data is local, then
            #   if the user can select the data and aggregate time, and requested format is
            #     'file' return the filenames
            #   else : read the data, create a cache file for that, and recurse
            # Go to derived variable evaluation if appicable
            #if ds.variable in operators.derived_variables and not ds.hasRawVariable() :
            if operators.is_derived_variable(ds.variable,ds.project) :
                if ds.variable in derived_list :
                    raise Climaf_Driver_Error("Loop detected while evaluating"
                         "derived variable "+ ds.variable + " " + `derived_list`)
                derived=derive_variable(ds)
                clogger.debug("evaluating derived variable %s as %s"%\
                                  (ds.variable,`derived`))
                derived_value=ceval(derived, format=format, deep=deep, 
                                    userflags=userflags,
                                    derived_list=derived_list+[ds.variable],
                                    recurse_list=recurse_list)
                if derived_value : 
                    clogger.debug("succeeded in evaluating derived variable %s as %s"%\
                                      (ds.variable,`derived`))
                    set_variable(derived_value, ds.variable, format=format)
                cdedent()
                return(derived_value)
            elif ((userflags.canSelectVar   or ds.oneVarPerFile()   ) and \
                (userflags.canSelectTime    or ds.periodIsFine()    ) and \
                (userflags.canSelectDomain  or ds.domainIsFine()    ) and \
                (userflags.canAggregateTime or ds.periodHasOneFile()) and \
                (userflags.canAlias         or ds.hasExactVariable()) and \
                (userflags.canMissing       or ds.missingIsOK())      and \
                #(userflags.doSqueezeMembers or ds.hasOneMember()) and 
                (format == 'file')) :
                clogger.debug("Delivering file set or sets is OK for the target use")
                cdedent()
                rep=ds.baseFiles()
                if not rep : raise Climaf_Driver_Error("No file found for %s"%`ds`)
                return(rep) # a single string with all filenames,
                               #or a list of such strings in case of ensembles
            else:
                clogger.debug("Must subset and/or aggregate and/or select "+
                              "var from data files and/or get data, or provide object result")
                ## extract=cread(ds)
                ## clogger.debug(" Done with subsetting and caching data files")
                ## cstore(extract) # extract should include the dataset def
                ## return ceval(extract,userflags,format)
                clogger.debug("Fetching/selection/aggregation is done using an external script for now - TBD")
                extract=capply('select',ds)
                if extract is None :
                    raise Climaf_Driver_Error("Cannot access dataset" + `ds`)
                rep=ceval(extract,userflags=userflags,format=format)
                userflags.unset_selectors()
                cdedent()
                return rep
        else :
            # else (non-local and non-cached dataset)
            #   if the user can access the dataset by one of the dataset-specific protocols
            #   then assume it can also select on time and provide it with the address
            #   else : fetch the relevant selection of the data, and store it in cache
            clogger.debug("Dataset is remote " )
            if (userflags.canOpenDap and format == 'file' ) :
                clogger.debug("But user can OpenDAP " )
                cdedent()
                return(ds.adressOf())
            else :
                clogger.debug("Must remote read and cache " )
                rep=ceval(capply('remote_select',ds),userflags=userflags,format=format)
                userflags.unset_selectors()
                cdedent()
                return rep
    #
    elif isinstance(cobject,classes.ctree) or isinstance(cobject,classes.scriptChild) or \
             isinstance(cobject,classes.cpage) or isinstance(cobject,classes.cens) : 
        recurse_list.append(cobject.buildcrs())
        clogger.debug("Evaluating compound object : " + `cobject`)
        #################################################################
        if (deep is not None) : cache.cdrop(cobject.crs)
        #
        clogger.debug("Searching cache for exact object : " + `cobject`)
        #################################################################
        filename=cache.hasExactObject(cobject)
        #filename=None
        if filename :
            clogger.info("Object found in cache: %s is at %s:  "%(cobject.crs,filename))
            cdedent()
            if format=='file' : return filename
            else: return cread(filename,varOf(cobject))
        if not isinstance(cobject,classes.cpage) and not isinstance(cobject,classes.cens) :
            #
            clogger.debug("Searching cache for including object for : " + `cobject`)
            ########################################################################
            it,altperiod=cache.hasIncludingObject(cobject)
            #clogger.debug("Finished with searching cache for including object for : " + `cobject`)
            #it=None
            if it :
                clogger.info("Including object found in cache : %s"%(it.crs))
                clogger.info("Selecting "+`cobject`+" out of it")
                # Just select (if necessary for the user) the portion relevant to the request
                rep=ceval_select(it,cobject,userflags,format,deep,derived_list, recurse_list)
                cdedent()
                return rep
            #
            clogger.debug("Searching cache for begin  object for : " + `cobject`)
            ########################################################################
            it,comp_period=cache.hasBeginObject(cobject) 
            clogger.debug("Finished with searching cache for begin  object for : " + `cobject`)
            #it=None
            if it : 
                clogger.info("partial result found in cache for %s : %s"%\
                             (cobject.crs,it.crs))
                begcrs=it.crs
                # Turn object for begin in complement object for end, and eval it
                it.setperiod(comp_period)
                ceval(it,userflags,format,deep,derived_list,recurse_list)
                if (format == 'file') :
                    rep=cache.complement(begcrs,it.crs,cobject.crs)
                    cdedent()
                    return rep
                else : raise Climaf_Driver_Error("cannot yet complement except for files")
            #
            clogger.info("nothing relevant found in cache for %s"%cobject.crs)
        #
        if deep==False : deep=None
        if isinstance(cobject,classes.ctree)  :
            #
            # the cache doesn't have a similar tree, let us recursively eval subtrees
            ##########################################################################
            # TBD  : analyze if the dataset is remote and the remote place 'offers' the operator
            if cobject.operator in operators.scripts :
                file=ceval_script(cobject,deep,recurse_list=recurse_list) # Does return a filename, or list of filenames
                cdedent()
                if ( format == 'file' ) : return (file)
                else : return cread(file,varOf(cobject))
            elif cobject.operator in operators.operators :
                obj=ceval_operator(cobject,deep)
                cdedent()
                if (format == 'file' ) : 
                    rep=cstore(obj) ; return rep
                else : return(obj)
            else : 
                raise Climaf_Driver_Error("operator %s is not a script nor known operator",str(cobject.operator))
        elif isinstance(cobject,classes.scriptChild) :
            # Force evaluation of 'father' script
            if ceval_script(cobject.father,deep,recurse_list=recurse_list)  is not None :
                # Re-evaluate, which should succeed using cache
                rep=ceval(cobject,userflags,format,deep,recurse_list=recurse_list)
                cdedent()
                return rep
            else :
                raise Climaf_Driver_Error("generating script aborted for "+cobject.father.crs)
        elif isinstance(cobject,classes.cpage) :
            file=cfilePage(cobject, deep, recurse_list=recurse_list)
            cdedent()
            if ( format == 'file' ) : return (file)
            else : return cread(file)
        elif isinstance(cobject,classes.cens) :
            rep=[]
            for member in cobject.members :
                rep.append(ceval(member,copy.copy(userflags),format,deep,recurse_list=recurse_list))
            if (format=="file") : return(reduce(lambda x,y : x+" "+y, rep))
            else : return rep
        else :
            raise Climaf_Driver_Error("Internal logic error")
    elif isinstance(cobject,str) :
        clogger.debug("Evaluating object from crs : %s"%cobject)
        raise Climaf_Driver_Error("Evaluation from CRS is not yet implemented ( %s )"%cobject)
    else :
        raise Climaf_Driver_Error("argument " +`cobject`+" is not (yet) managed")
Esempio n. 20
0
def selectGenericFiles(urls, **kwargs):
    """
    Allow to describe a ``generic`` file organization : the list of files returned 
    by this function is composed of files which :

    - match the patterns in ``url`` once these patterns are instantiated by 
      the values in kwargs, and 

     - contain the ``variable`` provided in kwargs

     - match the `period`` provided in kwargs
    
    In the pattern strings, no keyword is mandatory. However, for remote files,
    filename pattern must include ${varname}, which is instanciated by variable
    name or ``filenameVar`` (given via :py:func:`~climaf.classes.calias()`); this is  
    for the sake of efficiency (please complain if inadequate)
   
    Example :

    >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*YYYY*.nc)']
    /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc

    In the pattern strings, the keywords that can be used in addition to the argument
    names (e.g. ${model}) are:
    
    - ${variable} : use it if the files are split by variable and 
      filenames do include the variable name, as this speed up the search

    - YYYY, YYYYMM, YYYYMMDD : use it for indicating the start date of
      the period covered by each file, if this is applicable in the
      file naming; use a second time for end date, if applicable
      (otherwise the assumption is that the whole year -resp. month or
      day- is included in the file

    - wildcards '?' and '*' for matching respectively one and any number of characters


    """
    rep = []
    period = kwargs['period']
    if type(period) is str: period = init_period(period)
    variable = kwargs['variable']
    altvar = kwargs.get('filenameVar', variable)
    # a dict and an ordered list of date globbing patterns
    dt = dict(YYYY="????",
              YYYYMM="??????",
              YYYYMMDD="????????",
              YYYYMMDDHH="??????????")
    lkeys = dt.keys()
    lkeys.sort(reverse=True)
    # a dict and an ordered list for matching dates
    dr = dict(YYYY="([0-9]{4})",
              YYYYMM="([0-9]{6})",
              YYYYMMDD="([0-9]{8})",
              YYYYMMDDHH="([0-9]{10})")
    rkeys = dr.keys()
    rkeys.sort(reverse=True)
    #
    for l in urls:
        # Instantiate keywords in pattern with attributes values
        if re.findall(".*:.*", l):  # remote data
            remote_prefix = ':'.join(l.split(":")[0:-1]) + ':'
            template = Template(l.split(":")[-1]).safe_substitute(**kwargs)
        else:  # local data
            remote_prefix = ""
            template = Template(l).safe_substitute(**kwargs)
        #print "template after attributes replace : "+template
        #
        # Construct a pattern for globbing dates
        temp2 = template
        for k in lkeys:
            temp2 = temp2.replace(k, dt[k])
        if remote_prefix:
            lfiles = sorted(glob_remote_data(remote_prefix, temp2))
            clogger.debug("Remote globbing %d files for varname on %s : " %
                          (len(lfiles), remote_prefix + temp2))
        else:  # local data
            lfiles = sorted(glob.glob(temp2))
            clogger.debug("Globbing %d files for varname on %s : " %
                          (len(lfiles), temp2))
        #
        # If unsuccessful using varname, try with filenameVar
        if len(lfiles
               ) == 0 and "filenameVar" in kwargs and kwargs['filenameVar']:
            # Change value of facet 'variable'
            kwargs['variable'] = kwargs['filenameVar']
            if remote_prefix:  # remote data
                template = Template(l.split(":")[-1]).safe_substitute(**kwargs)
            else:  # local data
                template = Template(l).safe_substitute(**kwargs)
            temp2 = template
            for k in lkeys:
                temp2 = temp2.replace(k, dt[k])
            #
            if remote_prefix:  #
                lfiles = sorted(glob_remote_data(remote_prefix, temp2))
                clogger.debug("Globbing %d files for filenamevar on %s: " %
                              (len(lfiles), remote_prefix + temp2))
            else:  # local data
                lfiles = sorted(glob.glob(temp2))
                clogger.debug("Globbing %d files for filenamevar on %s: " %
                              (len(lfiles), temp2))
        #
        # Construct regexp for extracting dates from filename
        regexp = None
        #print "template before searching dates : "+template
        for key in rkeys:
            #print "searchin "+key+" in "+=Template(l)
            start = template.find(key)
            if (start >= 0):
                #print "found "+key
                regexp = template.replace(key, dr[key], 1)
                hasEnd = False
                start = regexp.find(key)
                if (start >= 0):
                    hasEnd = True
                    regexp = regexp.replace(key, dr[key], 1)
                break
        #print "regexp before searching dates : "+regexp
        #
        for f in lfiles:
            #print "processing file "+f
            #
            # Analyze file time period
            fperiod = None
            if regexp:
                regexp0 = regexp.replace("*", ".*").replace("?", r".")
                #print "regexp for extracting dates : "+regexp
                start = re.sub(regexp0, r'\1', f)
                if start == f:
                    raise Climaf_Data_Error("Start period not found")  #?
                if hasEnd:
                    end = re.sub(regexp0, r'\2', f)
                    fperiod = init_period("%s-%s" % (start, end))
                else:
                    fperiod = init_period(start)
                #print "period for file %s is %s"%(f,fperiod)
                #
                # Filter file time period against required period
            else:
                if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \
                    kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) :
                    # local data
                    if remote_prefix and \
                       ( (l.find("${variable}")>=0) or variable=='*' or \
                         fileHasVar(f,variable) or (variable != altvar and fileHasVar(f,altvar)) ) :
                        clogger.debug("adding fixed field :" + f)
                        rep.append(f)
                    # remote data
                    elif remote_prefix is not "":
                        if (l.split(":")[-1].find("${variable}")>=0) or variable=='*' or \
                           (variable != altvar and (f.find(altvar)>=0) ):
                            clogger.debug("adding fixed field :" +
                                          remote_prefix + f)
                            rep.append(remote_prefix + f)
                        else:
                            raise Climaf_Data_Error(
                                "For remote files, filename pattern (%s) should include ${varname} (which is instanciated by variable name or filenameVar)"
                                % f)
                else:
                    clogger.info(
                        "Cannot yet filter files re. time using only file content."
                    )
                    rep.append(f)

            if (fperiod and period.intersects(fperiod)) or not regexp:
                clogger.debug(
                    'Period is OK - Considering variable filtering on %s and %s for %s'
                    % (variable, altvar, f))
                # Filter against variable
                if (l.find("${variable}") >= 0):
                    clogger.debug(
                        'appending %s based on variable in filename' % f)
                    rep.append(remote_prefix + f)
                    continue
                if (f not in rep):
                    # local data
                    if not remote_prefix and \
                        (variable=='*' or "," in variable or fileHasVar(f,variable) or \
                        (altvar != variable and fileHasVar(f,altvar))) :
                        # Should check time period in the file if not regexp
                        clogger.debug(
                            'appending %s based on multi-var or var exists in file '
                            % f)
                        rep.append(f)
                        continue
                    # remote data
                    elif remote_prefix:
                        if variable=='*' or "," in variable or \
                            (variable != altvar and (f.find(altvar)>=0) ):
                            # Should check time period in the file if not regexp
                            clogger.debug(
                                'appending %s based on multi-var or altvar ' %
                                (remote_prefix + f))
                            rep.append(remote_prefix + f)
                            continue
                        else:
                            mess = "For remote files, filename pattern (%s) should include" % (
                                remote_prefix + f)
                            mess += " ${varname} (which is instanciated by variable name or filenameVar)"
                            raise Climaf_Data_Error(mess)
            else:
                if not fperiod:
                    clogger.debug('not appending %s because period is None ' %
                                  f)
                else:
                    if not period.intersects(fperiod):
                        clogger.debug(
                            'not appending %s because period doesn t intersect %s'
                            % (f, period))

        # Break on first url with any matching data
        if len(rep) > 0:
            clogger.debug('url %s does match for ' % l + ` kwargs `)
            break
    return rep