Example #1
0
def selectEmFiles(**kwargs):
    # Pour A et L : mon, day1, day2, 6hLev, 6hPlev, 3h
    simulation = kwargs['simulation']
    frequency = kwargs['frequency']
    variable = kwargs['variable']
    period = kwargs['period']
    realm = kwargs['realm']
    #
    freqs = {"mon": "", "3h": "_3h"}
    f = frequency
    if f in freqs:
        f = freqs[f]
    rep = []
    # Must look for all realms, here identified by a single letter
    if realm == "*":
        lrealm = ["A", "L", "O", "I"]
    else:
        lrealm = [realm]
    for realm in lrealm:
        clogger.debug("Looking for realm " + realm)
        # Use EM data for finding data dir
        freq_for_em = f
        if realm == 'I':
            freq_for_em = ""  # This is a special case ...
        command = [
            "grep", "^export EM_DIRECTORY_" + realm + freq_for_em + "=",
            os.path.expanduser(os.getenv("EM_HOME")) + "/expe_" + simulation
        ]
        try:
            ex = subprocess.Popen(command,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
        except:
            clogger.error("Issue getting archive_location for " + simulation +
                          " for realm " + realm + " with: " + repr(command))
            break
        if ex.wait() == 0:
            dir = ex.stdout.read().split("=")[1].replace('"',
                                                         "").replace("\n", "")
            clogger.debug("Looking at dir " + dir)
            if os.path.exists(dir):
                lfiles = os.listdir(dir)
                for fil in lfiles:
                    # clogger.debug("Looking at file "+fil)
                    fileperiod = periodOfEmFile(fil, realm, f)
                    if fileperiod and period.intersects(fileperiod):
                        if fileHasVar(dir + "/" + fil, variable):
                            rep.append(dir + "/" + fil)
                    # clogger.debug("Done with Looking at file "+fil)
            else:
                clogger.error(
                    "Directory %s does not exist for simulation %s, realm %s "
                    "and frequency %s" % (dir, simulation, realm, f))
        else:
            clogger.info("No archive location found for " + simulation +
                         " for realm " + realm + " with: " + repr(command))
    return rep
Example #2
0
def selectEmFiles(**kwargs) :
    #POur A et L : mon, day1, day2, 6hLev, 6hPlev, 3h
    simulation=kwargs['simulation']
    frequency=kwargs['frequency']
    variable=kwargs['variable']
    period=kwargs['period']
    realm=kwargs['realm']
    #
    freqs={ "mon" : "" , "3h" : "_3h"}
    f=frequency
    if f in freqs : f=freqs[f]
    rep=[]
    # Must look for all realms, here identified by a single letter
    if realm=="*" : lrealm= ["A", "L", "O", "I" ]
    else: lrealm=[ realm ]
    for realm in lrealm :
        clogger.debug("Looking for realm "+realm)
        # Use EM data for finding data dir
        freq_for_em=f
        if realm == 'I' : freq_for_em=""  # This is a special case ...
        command=["grep", "^export EM_DIRECTORY_"+realm+freq_for_em+"=",
                 os.path.expanduser(os.getenv("EM_HOME"))+"/expe_"+simulation ]
        try :
            ex = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        except :
            clogger.error("Issue getting archive_location for "+
                          simulation+" for realm "+realm+" with: "+`command`)
            break
        if ex.wait()==0 :
            dir=ex.stdout.read().split("=")[1].replace('"',"").replace("\n","")
            clogger.debug("Looking at dir "+dir)
            if os.path.exists(dir) :
                lfiles= os.listdir(dir)
                for fil in lfiles :
                    #clogger.debug("Looking at file "+fil)
                    fileperiod=periodOfEmFile(fil,realm,f)
                    if fileperiod and period.intersects(fileperiod) :
                        if fileHasVar(dir+"/"+fil,variable) :
                            rep.append(dir+"/"+fil)
                    #clogger.debug("Done with Looking at file "+fil)
            else : clogger.error("Directory %s does not exist for EM simulation %s, realm %s "
                                 "and frequency %s"%(dir,simulation,realm,f))
        else :
            clogger.info("No archive location found for "+
                          simulation+" for realm "+realm+" with: "+`command`)
    return rep
Example #3
0
def selectExampleFiles(urls,**kwargs) :
    rep=[]
    if (kwargs['frequency'] == "monthly") :
        for l in urls :
            for realm in ["A","L"] :
                #dir=l+"/"+realm+"/Origin/Monthly/"+simulation
                dir=l+"/"+realm
                clogger.debug("Looking at dir "+dir)
                if os.path.exists(dir) :
                    lfiles= os.listdir(dir)
                    for f in lfiles :
                        clogger.debug("Looking at file "+f)
                        fileperiod=periodOfEmFile(f,realm,'mon')
                        if fileperiod and fileperiod.intersects(kwargs['period']) :
                            if fileHasVar(dir+"/"+f,kwargs['variable']) :
                                rep.append(dir+"/"+f)
                            #else: print "No var ",variable," in file", dir+"/"+f
    return rep
Example #4
0
def selectExampleFiles(urls,**kwargs) :
    rep=[]
    if (kwargs['frequency'] == "monthly") :
        for l in urls :
            for realm in ["A","L"] :
                #dir=l+"/"+realm+"/Origin/Monthly/"+simulation
                dir=l+"/"+realm
                clogger.debug("Looking at dir "+dir)
                if os.path.exists(dir) :
                    lfiles= os.listdir(dir)
                    for f in lfiles :
                        clogger.debug("Looking at file "+f)
                        fileperiod=periodOfEmFile(f,realm,'mon')
                        if fileperiod and fileperiod.intersects(kwargs['period']) :
                            if fileHasVar(dir+"/"+f,kwargs['variable']) :
                                rep.append(dir+"/"+f)
                            #else: print "No var ",variable," in file", dir+"/"+f
    return rep
Example #5
0
def selectGenericFiles(urls, return_wildcards=None,merge_periods_on=None,**kwargs):
    """
    Allow to describe a ``generic`` file organization : the list of files returned 
    by this function is composed of files which :

    - match the patterns in ``url`` once these patterns are instantiated by 
      the values in kwargs, and 

     - contain the ``variable`` provided in kwargs

     - match the `period`` provided in kwargs
    
    In the pattern strings, no keyword is mandatory. However, for remote files,
    filename pattern must include ${varname}, which is instanciated by variable
    name or ``filenameVar`` (given via :py:func:`~climaf.classes.calias()`); this is  
    for the sake of efficiency (please complain if inadequate)
   
    Example :

    >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*${PERIOD}*.nc)']
    /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc

    In the pattern strings, the keywords that can be used in addition to the argument
    names (e.g. ${model}) are:
    
    - ${variable} : use it if the files are split by variable and 
      filenames do include the variable name, as this speed up the search

    - ${PERIOD} : use it for indicating the period covered by each file, if this 
      is applicable in the file naming; this period can appear in filenames as 
      YYYY, YYYYMM, YYYYMMDD, YYYYMMDDHHMM, either once only, or twice with 
      separator ='-' or '_'

    - wildcards '?' and '*' for matching respectively one and any number of characters


    """
    def store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards, merge_periods_on=None, 
                                    fperiod=None,periods=None,periods_dict=None):
        """"
        """
        if fperiod is not None and periods is not None :
            clogger.debug('Adding period %s'%fperiod)
            periods.append(fperiod)
        #
        for kw in kwargs :
            it=re.finditer(facets_regexp,f)
            for oc in it :
                try : facet_value=oc.group(kw)
                except : continue
                if type(kwargs[kw]) is str and ("*" in kwargs[kw] or "?" in kwargs[kw] ):
                    if facet_value is not None :
                        if kw not in wildcards : wildcards[kw]=set()
                        wildcards[kw].add(facet_value)
                        clogger.debug("Discover %s=%s for file=%s"%(kw,facet_value,f))
                    else :
                        clogger.debug("Logic issue for kw=%s and file=%s"%(kw,f))
                    #
                    if fperiod is not None and periods is not None :
                        if merge_periods_on is None : key=None
                        elif kw == merge_periods_on : key=facet_value
                        else :
                            #print "Skipping for kw=%s,sort=%s"%(kw,merge_periods_on)
                            continue                        
                        if key not in periods_dict: periods_dict[key]=set()
                        #print "adding period %s for key %s"%(fperiod,key) 
                        periods_dict[key].add(fperiod)
                    else:
                        pass
                        #print "no Adding period for %s=%s for %s"%(kw,facet_value,f)
        #print "end of store, periods_dict=",periods_dict, "wild=",wildcards

    rep=[]
    #
    periods=None # a list of periods available
    periods_dict=dict()
    #
    period=kwargs['period'] ;
    if period == "*" :
        periods=[] # List of all periods
    elif type(period) is str : period=init_period(period)
    #
    variable=kwargs['variable']
    altvar=kwargs.get('filenameVar',variable)
    #
    # dicts of date patterns, for globbing and for regexp
    #
    digit="[0-9]"
    date_glob_patt={ "${PERIOD}" : "*" } 
    # an ordered list of dates keywords
    date_keywords=date_glob_patt.keys() ; date_keywords.sort(reverse=True)
    #
    annee="%s{4}"%digit
    mois="(01|02|03|04|05|06|07|08|09|10|11|12)"
    jour="([0-3][0-9])"
    heure="(00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23)"
    minutes="[0-5][0-9]"
    date="%s(%s(%s(%s(%s)?)?)?)?"%(annee,mois,jour,heure,minutes)
    rperiod="(?P<period>(?P<start>%s)([_-](?P<end>%s))?)"%(date,date)
    date_regexp_patt={ "${PERIOD}" : rperiod } 
    # an ordered list of dates regexp keywords
    date_regexp_keywords=date_regexp_patt.keys() ; date_regexp_keywords.sort(reverse=True)
    #
    #
    for l in urls :
        # Instantiate keywords in pattern with attributes values
        remote_prefix="" ;
        if re.findall(".*:.*",l) :
            remote_prefix=':'.join(l.split(":")[0:-1])+':'
        basename=l.split(":")[-1] # This discard the remote_prefix if any
        basename=basename.replace("//","/")
        my_template=Template(basename)
        template=my_template.safe_substitute(**kwargs)
        #print "template after attributes replace : "+template
        #
        # Construct a pattern for globbing dates
        temp2=template
        for k in date_keywords : temp2=temp2.replace(k,date_glob_patt[k])
        # Do globbing with plain varname
        if remote_prefix : 
            lfiles=sorted(glob_remote_data(remote_prefix, temp2))
            clogger.debug("Remote globbing %d files for varname on %s : "%\
                          (len(lfiles),remote_prefix+temp2))
        else: # local data
            lfiles=sorted(glob.glob(temp2))
            clogger.debug("Before regexp filtering : Globbing %d files for varname on %s : "%(len(lfiles),temp2))
            # Must filter with regexp, because * with glob is too inclusive
            alt=[]
            for f in lfiles :
                for k in date_keywords :
                    if re.search(date_regexp_patt[k],f) :
                        alt.append(f)
                        continue
            lfiles=alt
            clogger.debug("Globbing %d files for varname on %s : "%(len(lfiles),temp2))
        #
        # If unsuccessful using varname, try with filenameVar
        if len(lfiles)==0 and "filenameVar" in kwargs and kwargs['filenameVar'] :
            # Change value of facet 'variable'
            kwargs['variable']=kwargs['filenameVar']
            template=my_template.safe_substitute(**kwargs)
            temp2=template
            for k in date_keywords : temp2=temp2.replace(k,date_glob_patt[k])
            #
            # Do globbing with fileVarname
            if remote_prefix : # 
                lfiles=sorted(glob_remote_data(remote_prefix, temp2))
                clogger.debug("Remote globbing %d files for filenamevar on %s: "%\
                              (len(lfiles),remote_prefix+temp2))
            else: # local data
                lfiles=sorted(glob.glob(temp2))
                # Must filter with regexp, because * with glob is too inclusive
                alt=[]
                for f in lfiles :
                    for k in date_keywords :
                        if re.search(date_regexp_patt[k],f) :
                            alt.append(f)
                            continue
                lfiles=alt
                clogger.debug("Globbing %d files for filenamevar on %s: "%(len(lfiles),temp2))
        #
        # For discovering values for those facets which are a wildcard,
        # construct a regexp with a group name for all facets (but period)
        alt_basename=basename.replace("?",".").replace("*",".*")
        alt_kwargs=kwargs.copy()
        for kw in kwargs :
            if type(kwargs[kw]) is str : # This excludes period attribute, which has a type
                alt_kwargs[kw]=kwargs[kw].replace("?",".").replace("*",".*")
                alt_basename=alt_basename.replace(r"${%s}"%kw,r"(?P<%s>%s)"%(kw,alt_kwargs[kw]),1)
        facets_regexp=Template(alt_basename).safe_substitute(**alt_kwargs)
        for k in date_regexp_keywords :
            facets_regexp=facets_regexp.replace(k,date_regexp_patt[k],1)
            facets_regexp=facets_regexp.replace(k,".*")
        wildcards=dict()
        #print "facets_regexp=",facets_regexp
        #
        # Construct regexp for extracting dates from filename
        date_regexp=None
        template_toreg=template.replace("*",".*").replace("?",r".").replace("+","\+")
        #print "template before searching dates : "+template_toreg
        for key in date_regexp_keywords :
            #print "searchin "+key+" in "+template
            start=template_toreg.find(key)
            if (start>=0 ) :
                date_regexp=template_toreg.replace(key,date_regexp_patt[key],1)
                #print "found ",key," dateregexp ->",date_regexp
                hasEnd=False
                start=date_regexp.find(key)
                #start=date_regexp.find(key)
                if (start >=0 ) :
                    hasEnd=True
                    date_regexp=date_regexp.replace(key,date_regexp_patt[key],1)
                    #date_regexp=date_regexp.replace(key,date_regexp_patt[key],1)
                break
        #print "date_regexp before searching dates : "+date_regexp
        #
        for f in lfiles :
            #print "processing file "+f
            #
            # Extract file time period
            #
            fperiod=None
            if date_regexp :
                if "P<period>" in date_regexp :
                    #print "date_rexgep=",date_regexp
                    #print "f=",f
                    #print "period=",re.sub(date_regexp,r'\g<period>',f)
                    tperiod=re.sub(date_regexp,r'\g<period>',f)
                    if tperiod==f :
                        raise classes.Climaf_Error("Cannot find a period in %s with regexp %s"%(f,date_regexp))
                    fperiod=init_period(tperiod)
                else:
                    date_regexp0=date_regexp 
                    #print "date_regexp for extracting dates : "+date_regexp0, "file="+f
                    start=re.sub(date_regexp0,r'\1',f)
                    if start==f:
                        raise Climaf_Data_Error("Start period not found in %s using regexp %s"%(f,regexp0)) #? 
                    if hasEnd :
                        end=re.sub(date_regexp0,r'\2',f)
                        fperiod=init_period("%s-%s"%(start,end))
                    else :
                        fperiod=init_period(start)
                #print "period for file %s is %s"%(f,fperiod)
                #
                # Filter file time period against required period
            else :
                if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \
                    kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) :
                    # local data
                    if not remote_prefix and \
                       ( (basename.find("${variable}")>=0) or variable=='*' or \
                         fileHasVar(f,variable) or (variable != altvar and fileHasVar(f,altvar)) ) :
                        clogger.debug("adding fixed field :"+f)
                        store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on)
                        rep.append(f)
                    # remote data
                    elif remote_prefix :
                        if (basename.find("${variable}")>=0) or variable=='*' or \
                           (variable != altvar and (f.find(altvar)>=0) ):
                            clogger.debug("adding fixed field :"+remote_prefix+f)
                            store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on)
                            rep.append(remote_prefix+f)
                        else:
                            raise classes.Climaf_Error(
                                "For remote files, filename pattern (%s) should include ${varname} "+\
                                "(which is instanciated by variable name or filenameVar)"%f)
                else :
                    clogger.info("Cannot yet filter files re. time using only file content.")
                    store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on)
                    rep.append(f)
            
            #
            # If file period matches requested period, check similarly for variable
            #
            #print "fperiod=",fperiod
            #print "periods=",periods
            #print "inter=",period.intersects(fperiod)
            #print "date_regexp=",date_regexp
            if (fperiod and ( periods is not None or period.intersects(fperiod) )) \
               or not date_regexp :
                #
                clogger.debug('Period is OK - Considering variable filtering on %s and %s for %s'%(variable,altvar,f)) 
                # Filter against variable 
                if (l.find("${variable}")>=0):
                    clogger.debug('appending %s based on variable in filename'%f)
                    store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on,
                                                fperiod,periods,periods_dict)
                    rep.append(remote_prefix+f)
                    continue    
                if (f not in rep):
                    # local data
                    if not remote_prefix and \
                        (variable=='*' or "," in variable or fileHasVar(f,variable) or \
                        (altvar != variable and fileHasVar(f,altvar))) :
                        # Should check time period in the file if not date_regexp
                        clogger.debug('appending %s based on multi-var or var exists in file '%f)
                        store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on,
                                                    fperiod,periods,periods_dict)
                        rep.append(f)
                        continue
                    # remote data
                    elif remote_prefix  : 
                        if variable=='*' or "," in variable or \
                            (variable != altvar and (f.find(altvar)>=0) ):
                            # Should check time period in the file if not date_regexp
                            clogger.debug('appending %s based on multi-var or altvar '%(remote_prefix+f))
                            store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards, merge_periods_on,
                                                        fperiod,periods,periods_dict)
                            rep.append(remote_prefix+f)
                            continue
                        else:
                            mess="For remote files, filename pattern (%s) should include"%(remote_prefix+f)
                            mess+=" ${varname} (which is instanciated by variable name or filenameVar)"
                            raise classes.Climaf_Error(mess)
            else:
                if not fperiod :
                    clogger.debug('not appending %s because period is None '%f)
                elif not period.intersects(fperiod) :
                        clogger.debug('not appending %s because period doesn t intersect %s'%(f,period))
                else:
                    clogger.debug('not appending %s for some other reason %s'%(f))

        # Break on first url with any matching data
        if len(rep)>0 :
            clogger.debug('url %s does match for '%l + `kwargs`)
            break

    #  For wildcard facets, discover facet values + checks
    for facet in wildcards:
        s=wildcards[facet]
        if return_wildcards is not None :
            if facet=="period" :
                #print "s=",s," periods_dict=",periods_dict
                for val in periods_dict : 
                    periods_dict[val]=sort_periods_list(list(periods_dict[val]))
                clogger.info("Attribute period='*' has values %s"%(periods_dict))
                return_wildcards["period"]=periods_dict
            else:
                if len(s) == 1 :
                    s=s.pop()
                    clogger.info("Attribute %s='%s' has matching value '%s'"%(facet,kwargs[facet],s))
                    return_wildcards[facet]=s
                else:
                    rep=list(s); rep.sort()
                    return_wildcards[facet]=rep
                    message="Attribute %s='%s' has multiple values : %s"%(facet,kwargs[facet],list(s))
                    if return_wildcards : clogger.info(message)
                    else: clogger.error(message)
                s=return_wildcards[facet]
        else:
            clogger.debug("return_wildcards is None")
    return rep
Example #6
0
def selectGenericFiles(urls, **kwargs):
    """
    Allow to describe a ``generic`` file organization : the list of files returned 
    by this function is composed of files which :

    - match the patterns in ``url`` once these patterns are instantiated by 
      the values in kwargs, and 

     - contain the ``variable`` provided in kwargs

     - match the `period`` provided in kwargs
    
    In the pattern strings, no keyword is mandatory. However, for remote files,
    filename pattern must include ${varname}, which is instanciated by variable
    name or ``filenameVar`` (given via :py:func:`~climaf.classes.calias()`); this is  
    for the sake of efficiency (please complain if inadequate)
   
    Example :

    >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*YYYY*.nc)']
    /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc

    In the pattern strings, the keywords that can be used in addition to the argument
    names (e.g. ${model}) are:
    
    - ${variable} : use it if the files are split by variable and 
      filenames do include the variable name, as this speed up the search

    - YYYY, YYYYMM, YYYYMMDD : use it for indicating the start date of
      the period covered by each file, if this is applicable in the
      file naming; use a second time for end date, if applicable
      (otherwise the assumption is that the whole year -resp. month or
      day- is included in the file

    - wildcards '?' and '*' for matching respectively one and any number of characters


    """
    rep = []
    period = kwargs['period']
    if type(period) is str: period = init_period(period)
    variable = kwargs['variable']
    altvar = kwargs.get('filenameVar', variable)
    # a dict and an ordered list of date globbing patterns
    dt = dict(YYYY="????",
              YYYYMM="??????",
              YYYYMMDD="????????",
              YYYYMMDDHH="??????????")
    lkeys = dt.keys()
    lkeys.sort(reverse=True)
    # a dict and an ordered list for matching dates
    dr = dict(YYYY="([0-9]{4})",
              YYYYMM="([0-9]{6})",
              YYYYMMDD="([0-9]{8})",
              YYYYMMDDHH="([0-9]{10})")
    rkeys = dr.keys()
    rkeys.sort(reverse=True)
    #
    for l in urls:
        # Instantiate keywords in pattern with attributes values
        if re.findall(".*:.*", l):  # remote data
            remote_prefix = ':'.join(l.split(":")[0:-1]) + ':'
            template = Template(l.split(":")[-1]).safe_substitute(**kwargs)
        else:  # local data
            remote_prefix = ""
            template = Template(l).safe_substitute(**kwargs)
        #print "template after attributes replace : "+template
        #
        # Construct a pattern for globbing dates
        temp2 = template
        for k in lkeys:
            temp2 = temp2.replace(k, dt[k])
        if remote_prefix:
            lfiles = sorted(glob_remote_data(remote_prefix, temp2))
            clogger.debug("Remote globbing %d files for varname on %s : " %
                          (len(lfiles), remote_prefix + temp2))
        else:  # local data
            lfiles = sorted(glob.glob(temp2))
            clogger.debug("Globbing %d files for varname on %s : " %
                          (len(lfiles), temp2))
        #
        # If unsuccessful using varname, try with filenameVar
        if len(lfiles
               ) == 0 and "filenameVar" in kwargs and kwargs['filenameVar']:
            # Change value of facet 'variable'
            kwargs['variable'] = kwargs['filenameVar']
            if remote_prefix:  # remote data
                template = Template(l.split(":")[-1]).safe_substitute(**kwargs)
            else:  # local data
                template = Template(l).safe_substitute(**kwargs)
            temp2 = template
            for k in lkeys:
                temp2 = temp2.replace(k, dt[k])
            #
            if remote_prefix:  #
                lfiles = sorted(glob_remote_data(remote_prefix, temp2))
                clogger.debug("Globbing %d files for filenamevar on %s: " %
                              (len(lfiles), remote_prefix + temp2))
            else:  # local data
                lfiles = sorted(glob.glob(temp2))
                clogger.debug("Globbing %d files for filenamevar on %s: " %
                              (len(lfiles), temp2))
        #
        # Construct regexp for extracting dates from filename
        regexp = None
        #print "template before searching dates : "+template
        for key in rkeys:
            #print "searchin "+key+" in "+=Template(l)
            start = template.find(key)
            if (start >= 0):
                #print "found "+key
                regexp = template.replace(key, dr[key], 1)
                hasEnd = False
                start = regexp.find(key)
                if (start >= 0):
                    hasEnd = True
                    regexp = regexp.replace(key, dr[key], 1)
                break
        #print "regexp before searching dates : "+regexp
        #
        for f in lfiles:
            #print "processing file "+f
            #
            # Analyze file time period
            fperiod = None
            if regexp:
                regexp0 = regexp.replace("*", ".*").replace("?", r".")
                #print "regexp for extracting dates : "+regexp
                start = re.sub(regexp0, r'\1', f)
                if start == f:
                    raise Climaf_Data_Error("Start period not found")  #?
                if hasEnd:
                    end = re.sub(regexp0, r'\2', f)
                    fperiod = init_period("%s-%s" % (start, end))
                else:
                    fperiod = init_period(start)
                #print "period for file %s is %s"%(f,fperiod)
                #
                # Filter file time period against required period
            else:
                if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \
                    kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) :
                    # local data
                    if remote_prefix and \
                       ( (l.find("${variable}")>=0) or variable=='*' or \
                         fileHasVar(f,variable) or (variable != altvar and fileHasVar(f,altvar)) ) :
                        clogger.debug("adding fixed field :" + f)
                        rep.append(f)
                    # remote data
                    elif remote_prefix is not "":
                        if (l.split(":")[-1].find("${variable}")>=0) or variable=='*' or \
                           (variable != altvar and (f.find(altvar)>=0) ):
                            clogger.debug("adding fixed field :" +
                                          remote_prefix + f)
                            rep.append(remote_prefix + f)
                        else:
                            raise Climaf_Data_Error(
                                "For remote files, filename pattern (%s) should include ${varname} (which is instanciated by variable name or filenameVar)"
                                % f)
                else:
                    clogger.info(
                        "Cannot yet filter files re. time using only file content."
                    )
                    rep.append(f)

            if (fperiod and period.intersects(fperiod)) or not regexp:
                clogger.debug(
                    'Period is OK - Considering variable filtering on %s and %s for %s'
                    % (variable, altvar, f))
                # Filter against variable
                if (l.find("${variable}") >= 0):
                    clogger.debug(
                        'appending %s based on variable in filename' % f)
                    rep.append(remote_prefix + f)
                    continue
                if (f not in rep):
                    # local data
                    if not remote_prefix and \
                        (variable=='*' or "," in variable or fileHasVar(f,variable) or \
                        (altvar != variable and fileHasVar(f,altvar))) :
                        # Should check time period in the file if not regexp
                        clogger.debug(
                            'appending %s based on multi-var or var exists in file '
                            % f)
                        rep.append(f)
                        continue
                    # remote data
                    elif remote_prefix:
                        if variable=='*' or "," in variable or \
                            (variable != altvar and (f.find(altvar)>=0) ):
                            # Should check time period in the file if not regexp
                            clogger.debug(
                                'appending %s based on multi-var or altvar ' %
                                (remote_prefix + f))
                            rep.append(remote_prefix + f)
                            continue
                        else:
                            mess = "For remote files, filename pattern (%s) should include" % (
                                remote_prefix + f)
                            mess += " ${varname} (which is instanciated by variable name or filenameVar)"
                            raise Climaf_Data_Error(mess)
            else:
                if not fperiod:
                    clogger.debug('not appending %s because period is None ' %
                                  f)
                else:
                    if not period.intersects(fperiod):
                        clogger.debug(
                            'not appending %s because period doesn t intersect %s'
                            % (f, period))

        # Break on first url with any matching data
        if len(rep) > 0:
            clogger.debug('url %s does match for ' % l + ` kwargs `)
            break
    return rep
Example #7
0
def selectGenericFiles(urls, **kwargs):
    """
    Allow to describe a ``generic`` file organization : the list of files returned 
    by this function is composed of files which :

    - match the patterns in ``url`` once these patterns are instantiated by 
      the values in kwargs, and 

     - contain the ``variable`` provided in kwargs

     - match the `period`` provided in kwargs

    In the pattern strings, no keyword is mandatory

    Example :

    >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*YYYY*.nc)']
    /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc

    In the pattern strings, the keywords that can be used in addition to the argument
    names (e.g. ${model}) are:
    
    - ${variable} : use it if the files are split by variable and 
      filenames do include the variable name, as this speed up the search

    - YYYY, YYYYMM, YYYYMMDD : use it for indicating the start date of
      the period covered by each file, if this is applicable in the
      file naming; use a second time for end date, if applicable
      (otherwise the assumption is that the whole year -resp. month or
      day- is included in the file

    - wildcards '?' and '*' for matching respectively one and any number of characters


    """
    rep=[]
    period=kwargs['period']
    if type(period) is str : period=init_period(period)
    variable=kwargs['variable']
    mustHaveVariable=False
    if "filenameVar" in kwargs and kwargs['filenameVar'] :
        kwargs['variable']=kwargs['filenameVar']
        mustHaveVariable=True
    for l in urls :
        template=Template(l)
        # There is no use to look for files which path is not specific
        # to the required variable when we know it should
        if l.find("${variable}") < 0 and mustHaveVariable :
            continue
        #
        # Instantiate keywords in pattern with attributes values
        template=template.safe_substitute(**kwargs)
        #print "template after attributes replace : "+template
        #
        # Construct a pattern for globbing dates
        temp2=template
        dt=dict(YYYY="????",YYYYMM="??????",YYYYMMDD="????????")
        for k in dt : temp2=temp2.replace(k,dt[k])
        clogger.debug("Globbing on : "+temp2)
        lfiles=glob.glob(temp2)
        #
        # Analyze all filenames
        for f in lfiles :
            # print "looking at file"+f
            # Construct regexp for extracting dates from filename
            dt=dict(YYYY="([0-9]{4})",YYYYMM="([0-9]{6})",
                    YYYYMMDD="([0-9]{10})")
            regexp=None
            # print "template before searching dates : "+template
            lkeys=dt.keys() ; lkeys.sort(reverse=True)
            for key in lkeys :
                # print "searchin "+key+" in "+template
                start=template.find(key)
                if (start>=0 ) :
                    # print "found "+key
                    regexp=template.replace(key,dt[key],1)
                    hasEnd=False
                    start=regexp.find(key) 
                    if (start >=0 ) :
                        hasEnd=True
                        regexp=regexp.replace(key,dt[key],1)
                    break
            #
            # Analyze file time period
            fperiod=None
            if regexp :
                regexp=regexp.replace("*",".*").replace("?",r".")
                # print "regexp for extracting dates : "+regexp
                start=re.sub(regexp,r'\1',f)
                if hasEnd :
                    end=re.sub(regexp,r'\2',f)
                    fperiod=init_period("%s-%s"%(start,end))
                else :
                    fperiod=init_period(start)
                #
                # Filter file time period against required period
            else :
                if ( 'frequency' in kwargs and kwargs['frequency']=="fx") :
                    if (l.find("${variable}")>=0) or fileHasVar(f,variable) : 
                        clogger.debug("adding fixed field :"+f)
                        rep.append(f)
                else :
                    clogger.warning("Cannot yet filter files re. time using only file content. TBD")
                    rep.append(f)
            if (fperiod and period.intersects(fperiod)) or not regexp :
                # Filter against variable 
                if (l.find("${variable}")>=0) or fileHasVar(f,variable) : 
                    # Should check time period in the file if not regexp
                    # print "appending "+f
                    rep.append(f)
    return rep
Example #8
0
def zonal_mean_slice(model, variable, basin, season, ref=None, add_product_in_title=True, method='regrid_model_on_ref',
                     custom_plot_params={}, safe_mode=True, do_cfile=True, y='lin', ymin=None, plot_on_latitude=False, horizontal_regridding=True,
                      apply_period_manager=True):
    # -----------------------------------------------------------------------------------------------------------------------------
    # -- 1/ Moyenne zonale du modele:
    #        -> soit on a la variable en moyenne zonale deja calculee (zoblabla)
    #        -> soit on la calcule a partir des masks de bassin definis dans model:
    #              * path_mesh_mask donne le path vers les fichiers
    #              * mesh_masks est un dictionnaire qui pointe le fichier de mask pour chaque bassin (GLO, ATL, PAC, IND et ALLBAS)
    # -- Test pour voir si on a deja les moyennes zonales par bassin dans les variables dispos
    context = 'full_field'
    # -- Si method=='regrid_model_on_obs', on regrille le modele sur la grille de la reference, et on utilisera
    #    les masks de bassin de la reference qui sont dans son repertoire ($bassin_mask.nc)
    #    Cette methode favorise la structure latitudinale de la section (=> gradient eq/pole)
    if method=='regrid_model_on_ref':
       # -- Apply the frequency and time manager (IGCM_OUT)
       wmodel=model.copy() ; wmodel.update(dict(variable=variable))
       if apply_period_manager:
          frequency_manager_for_diag(wmodel, diag='clim')
          get_period_manager(wmodel)
       # -- Get the model data
       model_dat = ds(**wmodel)
       # -- Compute the climatology on the model grid and mask the zeros
       clim_model = clim_average( mask(model_dat, miss=0.0), season)
       #
       # -- Get the reference
       if ref:
          ref.update(dict(variable=variable))
          # -- Get the reference data
          ref_dat = ds(**ref)
          #
          # -- Get the context => model_model or bias
          context = ('bias' if 'product' in ref_dat.kvp else 'model_model')
          #
          # -- Compute the climatology
          clim_ref = clim_average(ref_dat, season)
          #
          # -- Regrid the model on the obs
          if safe_mode:
             try:
                rgrd_clim_model = lonlatvert_interpolation( regrid(clim_model, clim_ref, option='remapdis'), clim_ref, horizontal_regridding=False )
             except:
                print '--> Error in lonlatvert_interpolation( regrid(clim_model, clim_ref, option="remapdis"), clim_ref, horizontal_regridding=False )'
                print '--> Set safe_mode=False to see the error'
                rgrd_clim_model = clim_model
          else:
             rgrd_clim_model = lonlatvert_interpolation( regrid(clim_model, clim_ref, option='remapdis'), clim_ref, horizontal_regridding=False )
             print '----'
             print '----'
             print '----'
             print 'rgrd_clim_model = ',cfile(rgrd_clim_model)
             print 'clim_model = ', cfile(clim_model)
             print 'clim_ref = ', cfile(clim_ref)
             print '----'
             print '----'
             print '----'
          #
          # -- Get the reference mask
          if 'path_mesh_mask' in ref:
             mask_file = ref['path_mesh_mask'] + ref['mesh_masks'][basin]
          else:
             mask_file = os.path.dirname(str.split(ref_dat.baseFiles(),' ')[0])+'/'+basin.lower()+'_mask.nc'
          print '----'
          print '----'
          print '----'
          print '---> mask_file = ', mask_file
          print '----'
          print '----'
          print '----'
          mask_dat = fds( mask_file, variable='mask', period='fx')
          basin_mask = mask( mask_dat, miss=0.0)
          #
          # -- Apply the mask to the model and the ref
          masked_model = multiply(rgrd_clim_model, basin_mask)
          masked_ref   = multiply(clim_ref,   basin_mask)
          #
          if 'product' not in ref:
             masked_model = regridn(masked_model, cdogrid='r360x180', option='remapdis')
             masked_ref   = regridn(masked_ref, cdogrid='r360x180', option='remapdis')
          # -- Compute the zonal means
          ZM_MODEL = zonmean(masked_model)
          ZM_REF   = zonmean(masked_ref)
          #
          #print '==='
          #print '==='
          #print '==='
          #print '=== ZM_MODEL = ',cfile(ZM_MODEL)
          #print '=== ZM_REF = ',cfile(ZM_REF)
          #print '==='
          #print '==='
          #print '==='

          # -- Interpolate vertically and compute the difference
          if safe_mode:
             try:
                ZM_bias = diff_zonmean(ZM_MODEL, ZM_REF)
             except:
                print '--> Error in diff_zonmean(ZM_MODEL, ZM_REF)'
                print '--> Set safe_mode=False to track the error'
                ZM_bias = minus(ZM_MODEL, ZM_REF)
          else:
             ZM_bias = diff_zonmean(ZM_MODEL, ZM_REF)
          # -- Compute the zonal mean for the basin using the obs masks
       else:
          print 'No reference (obs) provided in zonal_mean_slice for method regrid_model_on_obs'
          # -- Get the reference mask
          if 'path_mesh_mask' in model:
             mask_file = model['path_mesh_mask'] + model['mesh_masks'][basin]
          else:
             mask_file = os.path.dirname(str.split(model_dat.baseFiles(),' ')[0])+'/'+basin.lower()+'_mask.nc'
          print 'mask_file = ', mask_file
          mask_dat = fds( mask_file, variable='mask', period='fx')
          basin_mask = mask( mask_dat, miss=0.0)
          #
          # -- Apply the mask to the model and the ref
          masked_model = multiply(clim_model, basin_mask)
          #
          if 'product' not in model:
             masked_model = regridn(masked_model, cdogrid='r360x180', option='remapdis')
          # -- Compute the zonal means
          ZM_MODEL = zonmean(masked_model)
      #
    if method=='regrid_ref_on_model':   
       #
       # -> Cette methode 
       #if variable=='thetao': tmpzonmvar = 'zotem'+region.lower()
       #if variable=='so':     tmpzonmvar = 'zosal'+region.lower()
       # -- Apply the frequency and time manager (IGCM_OUT)
       #wmodel=model.copy() #; wmodel.update(dict(variable=tmpzonmvar))
       #wmodel.update(dict(variable=variable))
       #frequency_manager_for_diag(wmodel, diag='clim')
       #get_period_manager(wmodel)
       #model_dat = ds(**wmodel)  # -> on regarde si ds() trouve un fichier qui correspondn a la variable
       #if tmp.baseFiles():
       #   # --> Fix to add nav_lat to the file
       #   if not fileHasDim(cfile(tmp),'nav_lat'):
       #      zonmean_model = add_nav_lat(tmp, nav_lat_file=nav_lat_zovarbasin_file(grid=whichORCAGrid(cfile(tmp))),
       #                                  coordinates=build_coordinates_zovarbasin(cfile(tmp)))
       #   else:
       #      zonmean_model = tmp
       #   modvar_climato_zonmean_basin = mask(clim_average(zonmean_model, season), miss=0.0)
       #else:
       #
       # -- Apply the frequency and time manager (IGCM_OUT)
       wmodel=model.copy() ; wmodel.update(dict(variable=variable))
       if apply_period_manager:
          frequency_manager_for_diag(wmodel, diag='SE')
          get_period_manager(wmodel)
       model_dat = ds(**wmodel)
       model_clim = ccdo(clim_average(model_dat,season), operator='setctomiss,0')
       if fileHasVar(cfile(model_clim), 'lev'):
          model_clim_ok = rename_depth(model_clim)
       else:
          model_clim_ok = model_clim
       # 
       #   if method=='regrid_model_on_1deg_grid':
       #   # --> In this case, we regrid the model on the obs
       #maskfile = model['path_mesh_mask'] + model['mesh_masks'][basin]
       #wmask = ccdo(fds(maskfile, variable='tmask', period='fx'), operator='setctomiss,0')
       #modvar_climato_masked = multiply(model_clim_ok, wmask)
       #   modvar_rgrd = regridn(modvar_climato_masked, cdogrid='r360x180',option='remapdis')
       #   modvar_climato_zonmean_basin = zonmean(modvar_rgrd)
       #   #
       #else:
       set_fixed_fields('ccdfzonalmean_bas', basin, model)
       # calculer la moyenne zonale pour le bassin choisi
       model_clim_zonmean_basin = ccdfzonalmean_bas(model_clim_ok, point_type='T', basin=str(basin).lower())
       # -- Ajouter les latitudes ici??
       #else:
       ZM_MODEL = model_clim_zonmean_basin
       print '--'
       print '--'
       print '--'
       print 'cfile(ZM_MODEL) = ',cfile(ZM_MODEL)
       print '--'
       print '--'
       print '--'
       #
       # -----------------------------------------------------------------------------------------------------------------------------
       # -- 2/ Moyenne zonale de la ref:
       # --    -> les refs sont fournies avec les masks de bassins; si la ref est un modele,
       #          on peut recuperer path_mesh_mask et mesh_masks (et donc les fichiers de masks de bassins)
       if ref:
          # calculer la climatologie pour la saison choisie
          if 'variable' not in ref: ref.update(dict(variable=variable))
          ref_dat = ds(**ref)
          ref_clim = clim_average(ref_dat, season)
          # -- Check whether the ref is a model or an obs to set the appropriate context
          context = ('bias' if 'product' in ref_dat.kvp else 'model_model')
          # 1. Si le context est 'model_model', on verifie si la variable ne serait pas disponible en moyenne zonale
          #       - si oui, on travaille directement avec celle-ci
          #       - si non, on recupere les masks de bassins
          # 2. Si le context est 'bias', on recupere les masks de bassins qui doivent etre dans le repertoire des obs
          #    A partir des masks, on calcule les moyennes zonales par bassin
          #zovarbas_ref = ref.copy() ; zovarbas.update(dict(variable=tmpzonmvar))
          #tmpref = ds(**zovarbas_ref)  # -> on regarde si ds() trouve un fichier qui correspondn a la variable
          # -- Si on a les variables pre-calculees en moyennes zonales pour le model et les obs, on utilise ces moyennes zonales
          # --> Ok si on utilise WOA13-v2 comme reference
          #if tmpref.baseFiles() and tmp.baseFiles():
          #   ref_clim_zonmean_basin_interp = regridn(mask(clim_average(tmpref, season), miss=0.0), cdogrid='r1x180', option='remapdis')
          #   model_clim_zonmean_basin_interp = regridn(model_clim_zonmean_basin, cdogrid='r1x180', option='remapdis')
          #   ZM_OBS = zonmean_interpolation(ref_clim_zonmean_basin_interp, model_clim_zonmean_basin_interp)
          #   ZM_MODEL = model_clim_zonmean_basin_interp
          #else:
          #   #
          ##   ref_clim = mask(clim_average(ref_dat, season), miss=0.0)
          if fileHasVar(cfile(ref_clim), 'lev'):
             ref_clim_ok = rename_depth(ref_clim)
          else:
             ref_clim_ok = ref_clim
          print "cfile(ref_clim_ok) = ",cfile(ref_clim_ok)
          #
          # -- Si 'ref' est un autre simulation et a des mesh_masks, on les utilisent
          if context=='model_model' and 'mesh_masks' in ref:
             set_fixed_fields('ccdfzonalmean_bas', basin, ref)
             ref_clim_interp = ref_clim_ok
          else:
             # -> Sinon, on regrille 'obs' sur le modele, et on utilise les masks de bassins
             # -> du model pour calculer les moyennes zonales
             #ref_clim_interp = regrid(ref_clim_ok, model_clim_ok, option='remapdis')
             ref_clim_interp = ccdo(ref_clim_ok, operator='remapdis,'+cfile(model_clim_ok))
          test = lonlatvert_interpolation(ref_clim_interp, model_clim_ok, horizontal_regridding=False)
          ref_clim_zonmean_basin = ccdfzonalmean_bas(test, point_type='T', basin=str(basin).lower())
          # calculer la moyenne zonale pour le bassin choisi
          ZM_OBS = zonmean_interpolation(ref_clim_zonmean_basin, model_clim_zonmean_basin, horizontal_regridding=False)
          ZM_bias = minus(ZM_MODEL, ZM_OBS)
        #
        # -- Now compute the difference (bias)
    if method=='regrid_on_1deg':
       print 'Not yet available : ',method

    # Plot
    #
    # -- Get the period for display in the plot: we build a tmp_period string
    # -- Check whether the period is described by clim_period, years or period (default)
    # -- and make a string with it
    tmp_period = build_period_str(wmodel)
    #
    # -- Title of the plot -> If 'customname' is in the dictionary of dat, it will be used
    # -- as the title. If not, it checks whether dat is a reference or a model simulation
    # -- and builds the title
    title = build_plot_title(wmodel, None)# add_product_in_title='') #add_product_in_title)
    #
    # -- Get the default plot parameters with the function 'plot_params'
    # -- We also update with a custom dictionary of params (custom_plot_params) if the user sets one
    p = plot_params(variable+'_zonmean', context, custom_plot_params=custom_plot_params)
    p.update(dict(y=y,
             contours=1,
             tiMainFontHeightF=0.023,tiMainFont="helvetica-bold",
             gsnStringFontHeightF=0.019,
             options="cnMissingValFillColor=gray|trYReverse=True|"+\
                     "vpHeightF=0.4|vpWidthF=0.8|"+\
                     "pmLabelBarWidthF=0.075|pmLabelBarOrthogonalPosF=0.01|lbLabelFontHeightF=0.012|"
             ))
    if ymin: p['options']=p['options']+'|trYMinF='+str(ymin)
    #
    # -- Set the left, center and right strings of the plot
    p.update(dict(gsnRightString = tmp_period,
                  gsnCenterString = variable+' '+method,
	          gsnLeftString  = basin))
    #
    if ref:
        ZM = ZM_bias
    else:
        ZM = ZM_MODEL
    #
    plot_zonmean = plot(ZM, title=title, **p)
    # -- If the user doesn't want to do the cfile within plot_climato, set do_cfile=False
    # -- Otherwise we check if the plot has been done successfully.
    # -- If not, the user can set safe_mode=False and clog('debug') to debug.
    return safe_mode_cfile_plot(plot_zonmean, do_cfile, safe_mode)
Example #9
0
def selectGenericFiles(urls, **kwargs):
    """
    Allow to describe a ``generic`` file organization : the list of files returned 
    by this function is composed of files which :

    - match the patterns in ``url`` once these patterns are instantiated by 
      the values in kwargs, and 

     - contain the ``variable`` provided in kwargs

     - match the `period`` provided in kwargs

    In the pattern strings, no keyword is mandatory

    Example :

    >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*YYYY*.nc)']
    /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc

    In the pattern strings, the keywords that can be used in addition to the argument
    names (e.g. ${model}) are:
    
    - ${variable} : use it if the files are split by variable and 
      filenames do include the variable name, as this speed up the search

    - YYYY, YYYYMM, YYYYMMDD : use it for indicating the start date of
      the period covered by each file, if this is applicable in the
      file naming; use a second time for end date, if applicable
      (otherwise the assumption is that the whole year -resp. month or
      day- is included in the file

    - wildcards '?' and '*' for matching respectively one and any number of characters


    """
    rep=[]
    period=kwargs['period']
    if type(period) is str : period=init_period(period)
    variable=kwargs['variable']
    altvar=kwargs.get('filenameVar',variable)
    # a dict and an ordered list of date globbing patterns
    dt=dict(YYYY="????",YYYYMM="??????",YYYYMMDD="????????")
    lkeys=dt.keys() ; lkeys.sort(reverse=True)
    # a dict and an ordered list for matching dates
    dr=dict(YYYY="([0-9]{4})",YYYYMM="([0-9]{6})", YYYYMMDD="([0-9]{8})")
    rkeys=dr.keys() ; rkeys.sort(reverse=True)
    #
    for l in urls :
        # Instantiate keywords in pattern with attributes values
        template=Template(l).safe_substitute(**kwargs)
        #print "template after attributes replace : "+template
        #
        # Construct a pattern for globbing dates
        temp2=template ; 
        for k in lkeys : temp2=temp2.replace(k,dt[k])
        lfiles=glob.glob(temp2)
        clogger.debug("Globbing %d files for varname on %s : "%(len(lfiles),temp2))
        #
        # If unsuccessful using varname, try with filenameVar
        if len(lfiles)==0 and "filenameVar" in kwargs and kwargs['filenameVar'] :
            kwargs['variable']=kwargs['filenameVar']
            template=Template(l).safe_substitute(**kwargs)
            temp2=template
            for k in lkeys : temp2=temp2.replace(k,dt[k])
            #
            lfiles=glob.glob(temp2)
            clogger.debug("Globbing %d files for filenamevar on %s: "%(len(lfiles),temp2))

        # Construct regexp for extracting dates from filename
        regexp=None
        #print "template before searching dates : "+template
        for key in rkeys :
            #print "searchin "+key+" in "+=Template(l)
            start=template.find(key)
            if (start>=0 ) :
                #print "found "+key
                regexp=template.replace(key,dr[key],1)
                hasEnd=False
                start=regexp.find(key)
                if (start >=0 ) :
                    hasEnd=True
                    regexp=regexp.replace(key,dr[key],1)
                break
        #print "regexp before searching dates : "+regexp
        #
        for f in lfiles :
            #print "processing file "+f
            #
            # Analyze file time period
            fperiod=None
            if regexp :
                regexp0=regexp.replace("*",".*").replace("?",r".")
                #print "regexp for extracting dates : "+regexp
                start=re.sub(regexp0,r'\1',f)
                if start==f:
                    raise Climaf_Data_Error("Start period not found") #? LV
                if hasEnd :
                    end=re.sub(regexp0,r'\2',f)
                    fperiod=init_period("%s-%s"%(start,end))
                else :
                    fperiod=init_period(start)
                #print "period for file %s is %s"%(f,fperiod)
                #
                # Filter file time period against required period
            else :
                if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \
                    kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) :
                    if (l.find("${variable}")>=0) or fileHasVar(f,variable) or fileHasVar(f,altvar) : 
                        clogger.debug("adding fixed field :"+f)
                        rep.append(f)
                else :
                    clogger.warning("Cannot yet filter files re. time using only file content. TBD")
                    rep.append(f)
            if (fperiod and period.intersects(fperiod)) or not regexp :
                clogger.debug('Period is OK - Considering variable filtering on %s and %s for %s'%(variable,altvar,f)) 
                # Filter against variable 
                if (l.find("${variable}")>=0):
                    clogger.debug('appending %s based on variable in filename'%f)
                    rep.append(f)
                    continue
                if f not in rep and ( fileHasVar(f,variable) or fileHasVar(f,altvar) or ("," in variable)):
                    # Should check time period in the file if not regexp
                    clogger.debug('appending %s based on multi-var or var exists in file '%f)
                    rep.append(f)
            else:
                if not fperiod :
                    clogger.debug('not appending %s because period is None '%f)
                else:
                    if not period.intersects(fperiod) :
                        clogger.debug('not appending %s because period doesn t intersect %s'%(f,period))

    return rep