Beispiel #1
0
def periodOfEmFile(filename,realm,freq):
    """
    Return the period covered by a file handled by EM, based on filename
    rules for EM. returns None if file frequency does not fit freq
    """
    if (realm == 'A' or realm == 'L' ) :
        if freq=='mon' or freq=='' :
            year=re.sub(r'^.*([0-9]{4}).nc',r'\1',filename)
            if year.isdigit(): 
                speriod="%s01-%s12"%(year,year)
                return init_period(speriod)
        else:
                raise Climaf_Data_Error("can yet handle only monthly frequency for realms A and L - TBD")
    elif (realm == 'O' or realm == 'I' ) :
        if freq=='monthly' or freq=='mon' or freq=='' : altfreq='m'
        elif freq[0:2] =='da' : altfreq='d'
        else:
            raise Climaf_Data_Error("Can yet handle only monthly and daily frequency for realms O and I - TBD")
        patt=r'^.*_1'+altfreq+r'_([0-9]{8})_*([0-9]{8})_.*nc'
        beg=re.sub(patt,r'\1',filename)
        end=re.sub(patt,r'\2',filename)
        #clogger.debug("beg=%s,end=%s,fn=%s"%(beg,end,filename))
        if (end==filename or beg==filename) : return None
        return init_period("%s-%s"%(beg,end))
    else:
        raise Climaf_Data_Error("unexpected realm "+realm)
Beispiel #2
0
def periodOfEmFile(filename,realm,freq):
    """
    Return the period covered by a file handled by EM, based on filename
    rules for EM. returns None if file frequency does not fit freq
    """
    if (realm == 'A' or realm == 'L' ) :
        if freq=='mon' or freq=='' :
            year=re.sub(r'^.*([0-9]{4}).nc',r'\1',filename)
            if year.isdigit(): 
                speriod="%s01-%s12"%(year,year)
                return init_period(speriod)
        else:
                raise classes.Climaf_Error("can yet handle only monthly frequency for realms A and L - TBD")
    elif (realm == 'O' or realm == 'I' ) :
        if freq=='monthly' or freq=='mon' or freq=='' : altfreq='m'
        elif freq[0:2] =='da' : altfreq='d'
        else:
            raise classes.Climaf_Error("Can yet handle only monthly and daily frequency for realms O and I - TBD")
        patt=r'^.*_1'+altfreq+r'_([0-9]{8})_*([0-9]{8}).*nc'
        beg=re.sub(patt,r'\1',filename)
        end=re.sub(patt,r'\2',filename)
        #clogger.debug("beg=%s,end=%s,fn=%s"%(beg,end,filename))
        if (end==filename or beg==filename) : return None
        return init_period("%s-%s"%(beg,end))
    else:
        raise classes.Climaf_Error("unexpected realm "+realm)
Beispiel #3
0
def selectCmip5DrsFiles(urls, **kwargs) :
    # example for path : CMIP5/output1/CNRM-CERFACS/CNRM-CM5/1pctCO2/mon/atmos/
    #      Amon/r1i1p1/v20110701/clivi/clivi_Amon_CNRM-CM5_1pctCO2_r1i1p1_185001-189912.nc
    # second path segment can be any string (allows for : output,output1, merge...), 
    # but if 'merge' exists, it is used alone
    # If version is 'last', tries provide version from directory 'last' if available,
    # otherwise those of last dir
    project=kwargs['project']
    model=kwargs['model']
    simulation=kwargs['simulation']
    frequency=kwargs['frequency']
    variable=kwargs['variable']
    realm=kwargs['realm']
    table=kwargs['table']
    period=kwargs['period']
    experiment=kwargs['experiment']
    version=kwargs['version']
    #
    rep=[]
    frequency2drs=dict({'monthly':'mon'})
    freqd=frequency
    if frequency in frequency2drs : freqd=frequency2drs[frequency]
    # TBD : analyze ambiguity of variable among realms+tables
    for l in urls :
        pattern1=l+"/"+project+"/merge"
        if not os.path.exists(pattern1) : pattern1=l+"/"+project+"/*"
        patternv=pattern1+"/*/"+model+"/"+experiment+"/"+freqd+"/"+realm+"/"+table+"/"+simulation
        # Get version directories list
        ldirs=glob.glob(patternv)
        #print "looking at "+patternv+ " gives:" +`ldirs`
        for repert in ldirs :
            lversions=os.listdir(repert)
            lversions.sort()
            #print "lversions="+`lversions`+ "while version="+version
            cversion=version # initial guess of the version to use
            if (version == "last") :
                if (len(lversions)== 1) : cversion=lversions[0]
                elif (len(lversions)> 1) :
                    if "last" in lversions : cversion="last"
                    else :
                        cversion=lversions[-1] # Assume that order provided by sort() is OK
            #print "using version "+cversion+" for requested version: "+version
            lfiles=glob.glob(repert+"/"+cversion+"/"+variable+"/*.nc")
            #print "listing "+repert+"/"+cversion+"/"+variable+"/*.nc"
            #print 'lfiles='+`lfiles`
            for f in lfiles :
                if freqd != 'fx' :
                    #clogger.debug("checking period for "+ f)
                    regex=r'^.*([0-9]{4}[0-9]{2}-[0-9]{4}[0-9]{2}).nc$'
                    fileperiod=init_period(re.sub(regex,r'\1',f))
                    if (fileperiod and period.intersects(fileperiod)) :
                        rep.append(f)
                else :
                    clogger.debug("adding fixed field "+ f)
                    rep.append(f)

    return rep
Beispiel #4
0
def selectCmip5DrsFiles(urls, **kwargs) :
    # example for path : CMIP5/[output1/]CNRM-CERFACS/CNRM-CM5/1pctCO2/mon/atmos/
    #      Amon/r1i1p1/v20110701/clivi/clivi_Amon_CNRM-CM5_1pctCO2_r1i1p1_185001-189912.nc
    #
    # second path segment can be any string (allows for : output,output1, merge...), 
    # but if 'merge' exists, it is used alone
    # This segment ca also be empty
    #
    # If version is 'last', tries provide version from directory 'last' if available,
    # otherwise those of last dir
    project=kwargs['project']
    model=kwargs['model']
    simulation=kwargs['simulation']
    frequency=kwargs['frequency']
    variable=kwargs['variable']
    realm=kwargs['realm']
    table=kwargs['table']
    period=kwargs['period']
    experiment=kwargs['experiment']
    version=kwargs['version']
    #
    rep=[]
    frequency2drs=dict({'monthly':'mon'})
    freqd=frequency
    if frequency in frequency2drs : freqd=frequency2drs[frequency]
    # TBD : analyze ambiguity of variable among realms+tables
    for l in urls :
        totry=['merge/','output/','output?/','main/','']
        for p in totry :
            pattern1=l+"/"+project+"/"+p+"*/"+model # one * for modelling center
            joker_version="*"
            patternv=pattern1+"/"+experiment+"/"+freqd+"/"+realm+"/"+table+"/"+simulation+"/"+joker_version+"/"+variable
            if len(glob.glob(patternv))>0 : break
        patternv=pattern1+"/"+experiment+"/"+freqd+"/"+realm+"/"+table+"/"+simulation
        # Get version directories list
        ldirs=glob.glob(patternv)
        clogger.debug("Globbing with "+patternv+ " gives:" +`ldirs`)
        for repert in ldirs :
            lversions=os.listdir(repert)
            lversions.sort()
            #print "lversions="+`lversions`+ "while version="+version
            cversion=version # initial guess of the version to use
            if (version == "last") :
                if (len(lversions)== 1) : cversion=lversions[0]
                elif (len(lversions)> 1) :
                    if "last" in lversions : cversion="last"
                    else :
                        cversion=lversions[-1] # Assume that order provided by sort() is OK
            #print "using version "+cversion+" for requested version: "+version
            lfiles=glob.glob(repert+"/"+cversion+"/"+variable+"/*.nc")
            #print "listing "+repert+"/"+cversion+"/"+variable+"/*.nc"
            #print 'lfiles='+`lfiles`
            for f in lfiles :
                if freqd != 'fx' :
                    #clogger.debug("checking period for "+ f)
                    if freqd=='day':
                       regex=r'^.*([0-9]{8}-[0-9]{8}).nc$'
                    elif freqd=='mon':
                       #regex=r'^.*([0-9]{4}[0-9]{2}-[0-9]{4}[0-9]{2}).nc$'
                       regex=r'^.*([0-9]{6}-[0-9]{6}).nc$'
                    elif freqd=='yr':
                       regex=r'^.*([0-9]{4}-[0-9]{4}).nc$'
                    fileperiod=init_period(re.sub(regex,r'\1',f))
                    if (fileperiod and period.intersects(fileperiod)) :
                        rep.append(f)
                else :
                    clogger.debug("adding fixed field "+ f)
                    rep.append(f)

    return rep
Beispiel #5
0
def selectGenericFiles(urls, return_wildcards=None,merge_periods_on=None,**kwargs):
    """
    Allow to describe a ``generic`` file organization : the list of files returned 
    by this function is composed of files which :

    - match the patterns in ``url`` once these patterns are instantiated by 
      the values in kwargs, and 

     - contain the ``variable`` provided in kwargs

     - match the `period`` provided in kwargs
    
    In the pattern strings, no keyword is mandatory. However, for remote files,
    filename pattern must include ${varname}, which is instanciated by variable
    name or ``filenameVar`` (given via :py:func:`~climaf.classes.calias()`); this is  
    for the sake of efficiency (please complain if inadequate)
   
    Example :

    >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*${PERIOD}*.nc)']
    /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc

    In the pattern strings, the keywords that can be used in addition to the argument
    names (e.g. ${model}) are:
    
    - ${variable} : use it if the files are split by variable and 
      filenames do include the variable name, as this speed up the search

    - ${PERIOD} : use it for indicating the period covered by each file, if this 
      is applicable in the file naming; this period can appear in filenames as 
      YYYY, YYYYMM, YYYYMMDD, YYYYMMDDHHMM, either once only, or twice with 
      separator ='-' or '_'

    - wildcards '?' and '*' for matching respectively one and any number of characters


    """
    def store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards, merge_periods_on=None, 
                                    fperiod=None,periods=None,periods_dict=None):
        """"
        """
        if fperiod is not None and periods is not None :
            clogger.debug('Adding period %s'%fperiod)
            periods.append(fperiod)
        #
        for kw in kwargs :
            it=re.finditer(facets_regexp,f)
            for oc in it :
                try : facet_value=oc.group(kw)
                except : continue
                if type(kwargs[kw]) is str and ("*" in kwargs[kw] or "?" in kwargs[kw] ):
                    if facet_value is not None :
                        if kw not in wildcards : wildcards[kw]=set()
                        wildcards[kw].add(facet_value)
                        clogger.debug("Discover %s=%s for file=%s"%(kw,facet_value,f))
                    else :
                        clogger.debug("Logic issue for kw=%s and file=%s"%(kw,f))
                    #
                    if fperiod is not None and periods is not None :
                        if merge_periods_on is None : key=None
                        elif kw == merge_periods_on : key=facet_value
                        else :
                            #print "Skipping for kw=%s,sort=%s"%(kw,merge_periods_on)
                            continue                        
                        if key not in periods_dict: periods_dict[key]=set()
                        #print "adding period %s for key %s"%(fperiod,key) 
                        periods_dict[key].add(fperiod)
                    else:
                        pass
                        #print "no Adding period for %s=%s for %s"%(kw,facet_value,f)
        #print "end of store, periods_dict=",periods_dict, "wild=",wildcards

    rep=[]
    #
    periods=None # a list of periods available
    periods_dict=dict()
    #
    period=kwargs['period'] ;
    if period == "*" :
        periods=[] # List of all periods
    elif type(period) is str : period=init_period(period)
    #
    variable=kwargs['variable']
    altvar=kwargs.get('filenameVar',variable)
    #
    # dicts of date patterns, for globbing and for regexp
    #
    digit="[0-9]"
    date_glob_patt={ "${PERIOD}" : "*" } 
    # an ordered list of dates keywords
    date_keywords=date_glob_patt.keys() ; date_keywords.sort(reverse=True)
    #
    annee="%s{4}"%digit
    mois="(01|02|03|04|05|06|07|08|09|10|11|12)"
    jour="([0-3][0-9])"
    heure="(00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23)"
    minutes="[0-5][0-9]"
    date="%s(%s(%s(%s(%s)?)?)?)?"%(annee,mois,jour,heure,minutes)
    rperiod="(?P<period>(?P<start>%s)([_-](?P<end>%s))?)"%(date,date)
    date_regexp_patt={ "${PERIOD}" : rperiod } 
    # an ordered list of dates regexp keywords
    date_regexp_keywords=date_regexp_patt.keys() ; date_regexp_keywords.sort(reverse=True)
    #
    #
    for l in urls :
        # Instantiate keywords in pattern with attributes values
        remote_prefix="" ;
        if re.findall(".*:.*",l) :
            remote_prefix=':'.join(l.split(":")[0:-1])+':'
        basename=l.split(":")[-1] # This discard the remote_prefix if any
        basename=basename.replace("//","/")
        my_template=Template(basename)
        template=my_template.safe_substitute(**kwargs)
        #print "template after attributes replace : "+template
        #
        # Construct a pattern for globbing dates
        temp2=template
        for k in date_keywords : temp2=temp2.replace(k,date_glob_patt[k])
        # Do globbing with plain varname
        if remote_prefix : 
            lfiles=sorted(glob_remote_data(remote_prefix, temp2))
            clogger.debug("Remote globbing %d files for varname on %s : "%\
                          (len(lfiles),remote_prefix+temp2))
        else: # local data
            lfiles=sorted(glob.glob(temp2))
            clogger.debug("Before regexp filtering : Globbing %d files for varname on %s : "%(len(lfiles),temp2))
            # Must filter with regexp, because * with glob is too inclusive
            alt=[]
            for f in lfiles :
                for k in date_keywords :
                    if re.search(date_regexp_patt[k],f) :
                        alt.append(f)
                        continue
            lfiles=alt
            clogger.debug("Globbing %d files for varname on %s : "%(len(lfiles),temp2))
        #
        # If unsuccessful using varname, try with filenameVar
        if len(lfiles)==0 and "filenameVar" in kwargs and kwargs['filenameVar'] :
            # Change value of facet 'variable'
            kwargs['variable']=kwargs['filenameVar']
            template=my_template.safe_substitute(**kwargs)
            temp2=template
            for k in date_keywords : temp2=temp2.replace(k,date_glob_patt[k])
            #
            # Do globbing with fileVarname
            if remote_prefix : # 
                lfiles=sorted(glob_remote_data(remote_prefix, temp2))
                clogger.debug("Remote globbing %d files for filenamevar on %s: "%\
                              (len(lfiles),remote_prefix+temp2))
            else: # local data
                lfiles=sorted(glob.glob(temp2))
                # Must filter with regexp, because * with glob is too inclusive
                alt=[]
                for f in lfiles :
                    for k in date_keywords :
                        if re.search(date_regexp_patt[k],f) :
                            alt.append(f)
                            continue
                lfiles=alt
                clogger.debug("Globbing %d files for filenamevar on %s: "%(len(lfiles),temp2))
        #
        # For discovering values for those facets which are a wildcard,
        # construct a regexp with a group name for all facets (but period)
        alt_basename=basename.replace("?",".").replace("*",".*")
        alt_kwargs=kwargs.copy()
        for kw in kwargs :
            if type(kwargs[kw]) is str : # This excludes period attribute, which has a type
                alt_kwargs[kw]=kwargs[kw].replace("?",".").replace("*",".*")
                alt_basename=alt_basename.replace(r"${%s}"%kw,r"(?P<%s>%s)"%(kw,alt_kwargs[kw]),1)
        facets_regexp=Template(alt_basename).safe_substitute(**alt_kwargs)
        for k in date_regexp_keywords :
            facets_regexp=facets_regexp.replace(k,date_regexp_patt[k],1)
            facets_regexp=facets_regexp.replace(k,".*")
        wildcards=dict()
        #print "facets_regexp=",facets_regexp
        #
        # Construct regexp for extracting dates from filename
        date_regexp=None
        template_toreg=template.replace("*",".*").replace("?",r".").replace("+","\+")
        #print "template before searching dates : "+template_toreg
        for key in date_regexp_keywords :
            #print "searchin "+key+" in "+template
            start=template_toreg.find(key)
            if (start>=0 ) :
                date_regexp=template_toreg.replace(key,date_regexp_patt[key],1)
                #print "found ",key," dateregexp ->",date_regexp
                hasEnd=False
                start=date_regexp.find(key)
                #start=date_regexp.find(key)
                if (start >=0 ) :
                    hasEnd=True
                    date_regexp=date_regexp.replace(key,date_regexp_patt[key],1)
                    #date_regexp=date_regexp.replace(key,date_regexp_patt[key],1)
                break
        #print "date_regexp before searching dates : "+date_regexp
        #
        for f in lfiles :
            #print "processing file "+f
            #
            # Extract file time period
            #
            fperiod=None
            if date_regexp :
                if "P<period>" in date_regexp :
                    #print "date_rexgep=",date_regexp
                    #print "f=",f
                    #print "period=",re.sub(date_regexp,r'\g<period>',f)
                    tperiod=re.sub(date_regexp,r'\g<period>',f)
                    if tperiod==f :
                        raise classes.Climaf_Error("Cannot find a period in %s with regexp %s"%(f,date_regexp))
                    fperiod=init_period(tperiod)
                else:
                    date_regexp0=date_regexp 
                    #print "date_regexp for extracting dates : "+date_regexp0, "file="+f
                    start=re.sub(date_regexp0,r'\1',f)
                    if start==f:
                        raise Climaf_Data_Error("Start period not found in %s using regexp %s"%(f,regexp0)) #? 
                    if hasEnd :
                        end=re.sub(date_regexp0,r'\2',f)
                        fperiod=init_period("%s-%s"%(start,end))
                    else :
                        fperiod=init_period(start)
                #print "period for file %s is %s"%(f,fperiod)
                #
                # Filter file time period against required period
            else :
                if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \
                    kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) :
                    # local data
                    if not remote_prefix and \
                       ( (basename.find("${variable}")>=0) or variable=='*' or \
                         fileHasVar(f,variable) or (variable != altvar and fileHasVar(f,altvar)) ) :
                        clogger.debug("adding fixed field :"+f)
                        store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on)
                        rep.append(f)
                    # remote data
                    elif remote_prefix :
                        if (basename.find("${variable}")>=0) or variable=='*' or \
                           (variable != altvar and (f.find(altvar)>=0) ):
                            clogger.debug("adding fixed field :"+remote_prefix+f)
                            store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on)
                            rep.append(remote_prefix+f)
                        else:
                            raise classes.Climaf_Error(
                                "For remote files, filename pattern (%s) should include ${varname} "+\
                                "(which is instanciated by variable name or filenameVar)"%f)
                else :
                    clogger.info("Cannot yet filter files re. time using only file content.")
                    store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on)
                    rep.append(f)
            
            #
            # If file period matches requested period, check similarly for variable
            #
            #print "fperiod=",fperiod
            #print "periods=",periods
            #print "inter=",period.intersects(fperiod)
            #print "date_regexp=",date_regexp
            if (fperiod and ( periods is not None or period.intersects(fperiod) )) \
               or not date_regexp :
                #
                clogger.debug('Period is OK - Considering variable filtering on %s and %s for %s'%(variable,altvar,f)) 
                # Filter against variable 
                if (l.find("${variable}")>=0):
                    clogger.debug('appending %s based on variable in filename'%f)
                    store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on,
                                                fperiod,periods,periods_dict)
                    rep.append(remote_prefix+f)
                    continue    
                if (f not in rep):
                    # local data
                    if not remote_prefix and \
                        (variable=='*' or "," in variable or fileHasVar(f,variable) or \
                        (altvar != variable and fileHasVar(f,altvar))) :
                        # Should check time period in the file if not date_regexp
                        clogger.debug('appending %s based on multi-var or var exists in file '%f)
                        store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on,
                                                    fperiod,periods,periods_dict)
                        rep.append(f)
                        continue
                    # remote data
                    elif remote_prefix  : 
                        if variable=='*' or "," in variable or \
                            (variable != altvar and (f.find(altvar)>=0) ):
                            # Should check time period in the file if not date_regexp
                            clogger.debug('appending %s based on multi-var or altvar '%(remote_prefix+f))
                            store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards, merge_periods_on,
                                                        fperiod,periods,periods_dict)
                            rep.append(remote_prefix+f)
                            continue
                        else:
                            mess="For remote files, filename pattern (%s) should include"%(remote_prefix+f)
                            mess+=" ${varname} (which is instanciated by variable name or filenameVar)"
                            raise classes.Climaf_Error(mess)
            else:
                if not fperiod :
                    clogger.debug('not appending %s because period is None '%f)
                elif not period.intersects(fperiod) :
                        clogger.debug('not appending %s because period doesn t intersect %s'%(f,period))
                else:
                    clogger.debug('not appending %s for some other reason %s'%(f))

        # Break on first url with any matching data
        if len(rep)>0 :
            clogger.debug('url %s does match for '%l + `kwargs`)
            break

    #  For wildcard facets, discover facet values + checks
    for facet in wildcards:
        s=wildcards[facet]
        if return_wildcards is not None :
            if facet=="period" :
                #print "s=",s," periods_dict=",periods_dict
                for val in periods_dict : 
                    periods_dict[val]=sort_periods_list(list(periods_dict[val]))
                clogger.info("Attribute period='*' has values %s"%(periods_dict))
                return_wildcards["period"]=periods_dict
            else:
                if len(s) == 1 :
                    s=s.pop()
                    clogger.info("Attribute %s='%s' has matching value '%s'"%(facet,kwargs[facet],s))
                    return_wildcards[facet]=s
                else:
                    rep=list(s); rep.sort()
                    return_wildcards[facet]=rep
                    message="Attribute %s='%s' has multiple values : %s"%(facet,kwargs[facet],list(s))
                    if return_wildcards : clogger.info(message)
                    else: clogger.error(message)
                s=return_wildcards[facet]
        else:
            clogger.debug("return_wildcards is None")
    return rep
Beispiel #6
0
def ceval_script (scriptCall,deep,recurse_list=[]):
    """ Actually applies a CliMAF-declared script on a script_call object 
    
    Prepare operands as fiels and build command from operands and parameters list
    Assumes that scripts are described in dictionary 'scripts'  by templates as
    documented in operators.cscript
    
    Returns a CLiMAF cache data filename
    """
    script=operators.scripts[scriptCall.operator]
    template=Template(script.command)

    # Evaluate input data 
    dict_invalues=dict()
    sizes=[]
    for op in scriptCall.operands :
        inValue=ceval(op,userflags=scriptCall.flags,format='file',deep=deep,
                      recurse_list=recurse_list)
        if inValue is None or inValue is "" :
            raise Climaf_Driver_Error("When evaluating %s : value for %s is None"\
                                      %(scriptCall.script,`op`))
        if isinstance(inValue,list) : size=len(inValue)
        else : size=1
        sizes.append(size)
        dict_invalues[op]=inValue
    #
    # Replace input data placeholders with filenames
    subdict=dict()
    opscrs=""
    if 0 in script.inputs :
        label,multiple,serie=script.inputs[0]
        op=scriptCall.operands[0]
        infile=dict_invalues[op]
        if not all(map(os.path.exists,infile.split(" "))) :
            raise Climaf_Driver_Error("Internal error : some input file does not exist among %s:"%(infile))
        subdict[ label ]=infile
        #if scriptCall.flags.canSelectVar :
        subdict["var"]=varOf(op)
        if isinstance(op,classes.cdataset) and op.alias and scriptCall.flags.canAlias:
            filevar,scale,offset,units,filenameVar,missing=op.alias
            #if script=="select" and ((varOf(op) != filevar) or scale != 1.0 or offset != 0.) :
            if ((varOf(op) != filevar) or scale != 1.0 or offset != 0.) :
                subdict["alias"]="%s,%s,%.4g,%.4g"%(varOf(op),filevar,scale,offset)
                subdict["var"]=filevar
            if units : subdict["units"]=units 
            if scriptCall.flags.canMissing and missing :
                subdict["missing"]=missing
        if isinstance(op,classes.cens) :
            if not multiple :
                raise Climaf_Driver_Error(
                    "Script %s 's input #%s cannot accept ensemble %s"\
                        %(scriptCall.script,0,`op`))
            #subdict["labels"]=r'"'+reduce(lambda x,y : "'"+x+"' '"+y+"'", op.labels)+r'"'
            subdict["labels"]=reduce(lambda x,y : x+"$"+y, op.labels)
        per=timePeriod(op)
        if not per.fx and str(per) != "" and scriptCall.flags.canSelectTime:
            subdict["period"]=str(per)
            subdict["period_iso"]=per.iso()
        if scriptCall.flags.canSelectDomain :
            subdict["domain"]=domainOf(op)
    i=0
    for op in scriptCall.operands :
        opscrs += op.crs+" - "
        infile=dict_invalues[op]
        if not all(map(os.path.exists,infile.split(" "))) :
            raise Climaf_Driver_Error("Internal error : some input file does not exist among %s:"%(infile))
        i+=1
        if ( i> 1 or 1 in script.inputs) :
            label,multiple,serie=script.inputs[i]
            subdict[ label ]=infile
            # Provide the name of the variable in input file if script allows for
            subdict["var_%d"%i]=varOf(op)
            if isinstance(op,classes.cdataset) and op.alias :
                filevar,scale,offset,units,filenameVar,missing =op.alias
                if (varOf(op) != filevar) or (scale != 1.0) or (offset != 0.) :
                    subdict["alias_%d"%i]="%s %s %f %f"%(varOf(op),filevar,scale,offset)
                    subdict["var_%d"%i]=filevar
		if units : subdict["units_%d"%i]=units 
		if missing : subdict["missing_%d"%i]=missing
            # Provide period selection if script allows for
            per=timePeriod(op)
            if not per.fx and per != "":
                subdict["period_%d"%i]=str(per)
                subdict["period_iso_%d"%i]=per.iso()
            subdict["domain_%d"%i]=domainOf(op)
    clogger.debug("subdict for operands is "+`subdict`)
    # substitution is deffered after scriptcall parameters evaluation, which may
    # redefine e.g period
    #
    # Provide one cache filename for each output and instantiates the command accordingly
    if script.outputFormat is not None :
        # Compute a filename for each ouptut
        # Un-named main output
        main_output_filename=cache.generateUniqueFileName(scriptCall.crs,
                                                          format=script.outputFormat)
        subdict["out"]=main_output_filename
        subdict["out_"+scriptCall.variable]=main_output_filename
        # Named outputs
        for output in scriptCall.outputs:
            subdict["out_"+output]=cache.generateUniqueFileName(scriptCall.crs+"."+output,\
                                                         format=script.outputFormat)
    # Account for script call parameters
    for p in scriptCall.parameters : 
        #clogger.debug("processing parameter %s=%s"%(p,scriptCall.parameters[p]))
        subdict[p]=scriptCall.parameters[p]
        if p=="period" :
            subdict["period_iso"]=init_period(scriptCall.parameters[p]).iso()
    subdict["crs"]=opscrs.replace("'","")
    #
    # Combine CRS and possibly member_label to provide/complement title 
    if 'title' not in subdict :
        if 'member_label' in subdict :
            subdict["title"]=subdict['member_label']
        else:
            subdict["title"]=subdict["crs"]
    else: 
        if 'member_label' in subdict :
            subdict["title"]=subdict["title"]+" "+subdict['member_label']
            subdict.pop('member_label')
    #
    # Substitute all args
    template=template.safe_substitute(subdict)
    #
    # Allowing for some formal parameters to be missing in the actual call:
    #
    # Discard remaining substrings looking like :
    #  some_word='"${some_keyword}"'     , or:
    #  '"${some_keyword}"'
    template=re.sub(r'(\w*=)?(\'\")?\$\{\w*\}(\"\')?',r"",template)
    #
    # Discard remaining substrings looking like :
    #  some_word=${some_keyword}          , or
    #  ${some_keyword}
    template=re.sub(r"(\w*=)?\$\{\w*\}",r"",template)
    #
    # Launch script using command, and check termination 
    #command="PATH=$PATH:"+operators.scriptsPath+template+fileVariables
    #command="echo '\n\nstdout and stderr of script call :\n\t "+template+\
    #         "\n\n'> scripts.out  ; "+ template+ " >> scripts.out 2>&1"

    tim1=time.time()
    clogger.info("Launching command:"+template)
    #
    command=subprocess.Popen(template, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
    command.wait()
    #
    logfile=open('last.out', 'w')
    logfile.write("\n\nstdout and stderr of script call :\n\t "+template+"\n\n")
    command_std=""
    for line in command.stdout:
        command_std+=line
        logfile.write(line)
    logfile.close()
    
    if ( command.wait() == 0 ):
        if script.outputFormat is not None :
            # Tagging output files with their CliMAF Reference Syntax definition
            # Un-named main output
            ok = cache.register(main_output_filename,scriptCall.crs)
            # Named outputs
            for output in scriptCall.outputs:
                ok = ok and cache.register(subdict["out_"+output],\
                                           scriptCall.crs+"."+output)
            if ok : 
                duration=time.time() - tim1
                print("Done in %.1f s with script computation for %s "%\
                          (duration,`scriptCall`),file=sys.stderr)
                clogger.debug("Done in %.1f s with script computation for "
                              "%s (command was :%s )"%\
                                  (duration,`scriptCall`,template))
                return main_output_filename
            else :
                raise Climaf_Driver_Error("Some output missing when executing "
                                          ": %s. \n See last.out"%template)
        else :
            clogger.debug("script %s has no output"%script.name)
            return None
    else:
        clogger.debug("Full script output:\n"+command_std)
        comm2=subprocess.Popen(["tail", "-n", "10", "last.out"], stdout=subprocess.PIPE)
        clogger.error("Last lines of script output:\n"+comm2.stdout.read())
        raise Climaf_Driver_Error("Script failure for : %s. More details either in file "
                                  "./last.out or by re-runing with clog(\"debug\")" %template)
Beispiel #7
0
def selectGenericFiles(urls, **kwargs):
    """
    Allow to describe a ``generic`` file organization : the list of files returned 
    by this function is composed of files which :

    - match the patterns in ``url`` once these patterns are instantiated by 
      the values in kwargs, and 

     - contain the ``variable`` provided in kwargs

     - match the `period`` provided in kwargs
    
    In the pattern strings, no keyword is mandatory. However, for remote files,
    filename pattern must include ${varname}, which is instanciated by variable
    name or ``filenameVar`` (given via :py:func:`~climaf.classes.calias()`); this is  
    for the sake of efficiency (please complain if inadequate)
   
    Example :

    >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*YYYY*.nc)']
    /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc

    In the pattern strings, the keywords that can be used in addition to the argument
    names (e.g. ${model}) are:
    
    - ${variable} : use it if the files are split by variable and 
      filenames do include the variable name, as this speed up the search

    - YYYY, YYYYMM, YYYYMMDD : use it for indicating the start date of
      the period covered by each file, if this is applicable in the
      file naming; use a second time for end date, if applicable
      (otherwise the assumption is that the whole year -resp. month or
      day- is included in the file

    - wildcards '?' and '*' for matching respectively one and any number of characters


    """
    rep = []
    period = kwargs['period']
    if type(period) is str: period = init_period(period)
    variable = kwargs['variable']
    altvar = kwargs.get('filenameVar', variable)
    # a dict and an ordered list of date globbing patterns
    dt = dict(YYYY="????",
              YYYYMM="??????",
              YYYYMMDD="????????",
              YYYYMMDDHH="??????????")
    lkeys = dt.keys()
    lkeys.sort(reverse=True)
    # a dict and an ordered list for matching dates
    dr = dict(YYYY="([0-9]{4})",
              YYYYMM="([0-9]{6})",
              YYYYMMDD="([0-9]{8})",
              YYYYMMDDHH="([0-9]{10})")
    rkeys = dr.keys()
    rkeys.sort(reverse=True)
    #
    for l in urls:
        # Instantiate keywords in pattern with attributes values
        if re.findall(".*:.*", l):  # remote data
            remote_prefix = ':'.join(l.split(":")[0:-1]) + ':'
            template = Template(l.split(":")[-1]).safe_substitute(**kwargs)
        else:  # local data
            remote_prefix = ""
            template = Template(l).safe_substitute(**kwargs)
        #print "template after attributes replace : "+template
        #
        # Construct a pattern for globbing dates
        temp2 = template
        for k in lkeys:
            temp2 = temp2.replace(k, dt[k])
        if remote_prefix:
            lfiles = sorted(glob_remote_data(remote_prefix, temp2))
            clogger.debug("Remote globbing %d files for varname on %s : " %
                          (len(lfiles), remote_prefix + temp2))
        else:  # local data
            lfiles = sorted(glob.glob(temp2))
            clogger.debug("Globbing %d files for varname on %s : " %
                          (len(lfiles), temp2))
        #
        # If unsuccessful using varname, try with filenameVar
        if len(lfiles
               ) == 0 and "filenameVar" in kwargs and kwargs['filenameVar']:
            # Change value of facet 'variable'
            kwargs['variable'] = kwargs['filenameVar']
            if remote_prefix:  # remote data
                template = Template(l.split(":")[-1]).safe_substitute(**kwargs)
            else:  # local data
                template = Template(l).safe_substitute(**kwargs)
            temp2 = template
            for k in lkeys:
                temp2 = temp2.replace(k, dt[k])
            #
            if remote_prefix:  #
                lfiles = sorted(glob_remote_data(remote_prefix, temp2))
                clogger.debug("Globbing %d files for filenamevar on %s: " %
                              (len(lfiles), remote_prefix + temp2))
            else:  # local data
                lfiles = sorted(glob.glob(temp2))
                clogger.debug("Globbing %d files for filenamevar on %s: " %
                              (len(lfiles), temp2))
        #
        # Construct regexp for extracting dates from filename
        regexp = None
        #print "template before searching dates : "+template
        for key in rkeys:
            #print "searchin "+key+" in "+=Template(l)
            start = template.find(key)
            if (start >= 0):
                #print "found "+key
                regexp = template.replace(key, dr[key], 1)
                hasEnd = False
                start = regexp.find(key)
                if (start >= 0):
                    hasEnd = True
                    regexp = regexp.replace(key, dr[key], 1)
                break
        #print "regexp before searching dates : "+regexp
        #
        for f in lfiles:
            #print "processing file "+f
            #
            # Analyze file time period
            fperiod = None
            if regexp:
                regexp0 = regexp.replace("*", ".*").replace("?", r".")
                #print "regexp for extracting dates : "+regexp
                start = re.sub(regexp0, r'\1', f)
                if start == f:
                    raise Climaf_Data_Error("Start period not found")  #?
                if hasEnd:
                    end = re.sub(regexp0, r'\2', f)
                    fperiod = init_period("%s-%s" % (start, end))
                else:
                    fperiod = init_period(start)
                #print "period for file %s is %s"%(f,fperiod)
                #
                # Filter file time period against required period
            else:
                if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \
                    kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) :
                    # local data
                    if remote_prefix and \
                       ( (l.find("${variable}")>=0) or variable=='*' or \
                         fileHasVar(f,variable) or (variable != altvar and fileHasVar(f,altvar)) ) :
                        clogger.debug("adding fixed field :" + f)
                        rep.append(f)
                    # remote data
                    elif remote_prefix is not "":
                        if (l.split(":")[-1].find("${variable}")>=0) or variable=='*' or \
                           (variable != altvar and (f.find(altvar)>=0) ):
                            clogger.debug("adding fixed field :" +
                                          remote_prefix + f)
                            rep.append(remote_prefix + f)
                        else:
                            raise Climaf_Data_Error(
                                "For remote files, filename pattern (%s) should include ${varname} (which is instanciated by variable name or filenameVar)"
                                % f)
                else:
                    clogger.info(
                        "Cannot yet filter files re. time using only file content."
                    )
                    rep.append(f)

            if (fperiod and period.intersects(fperiod)) or not regexp:
                clogger.debug(
                    'Period is OK - Considering variable filtering on %s and %s for %s'
                    % (variable, altvar, f))
                # Filter against variable
                if (l.find("${variable}") >= 0):
                    clogger.debug(
                        'appending %s based on variable in filename' % f)
                    rep.append(remote_prefix + f)
                    continue
                if (f not in rep):
                    # local data
                    if not remote_prefix and \
                        (variable=='*' or "," in variable or fileHasVar(f,variable) or \
                        (altvar != variable and fileHasVar(f,altvar))) :
                        # Should check time period in the file if not regexp
                        clogger.debug(
                            'appending %s based on multi-var or var exists in file '
                            % f)
                        rep.append(f)
                        continue
                    # remote data
                    elif remote_prefix:
                        if variable=='*' or "," in variable or \
                            (variable != altvar and (f.find(altvar)>=0) ):
                            # Should check time period in the file if not regexp
                            clogger.debug(
                                'appending %s based on multi-var or altvar ' %
                                (remote_prefix + f))
                            rep.append(remote_prefix + f)
                            continue
                        else:
                            mess = "For remote files, filename pattern (%s) should include" % (
                                remote_prefix + f)
                            mess += " ${varname} (which is instanciated by variable name or filenameVar)"
                            raise Climaf_Data_Error(mess)
            else:
                if not fperiod:
                    clogger.debug('not appending %s because period is None ' %
                                  f)
                else:
                    if not period.intersects(fperiod):
                        clogger.debug(
                            'not appending %s because period doesn t intersect %s'
                            % (f, period))

        # Break on first url with any matching data
        if len(rep) > 0:
            clogger.debug('url %s does match for ' % l + ` kwargs `)
            break
    return rep
Beispiel #8
0
def selectGenericFiles(urls, **kwargs):
    """
    Allow to describe a ``generic`` file organization : the list of files returned 
    by this function is composed of files which :

    - match the patterns in ``url`` once these patterns are instantiated by 
      the values in kwargs, and 

     - contain the ``variable`` provided in kwargs

     - match the `period`` provided in kwargs

    In the pattern strings, no keyword is mandatory

    Example :

    >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*YYYY*.nc)']
    /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc

    In the pattern strings, the keywords that can be used in addition to the argument
    names (e.g. ${model}) are:
    
    - ${variable} : use it if the files are split by variable and 
      filenames do include the variable name, as this speed up the search

    - YYYY, YYYYMM, YYYYMMDD : use it for indicating the start date of
      the period covered by each file, if this is applicable in the
      file naming; use a second time for end date, if applicable
      (otherwise the assumption is that the whole year -resp. month or
      day- is included in the file

    - wildcards '?' and '*' for matching respectively one and any number of characters


    """
    rep=[]
    period=kwargs['period']
    if type(period) is str : period=init_period(period)
    variable=kwargs['variable']
    mustHaveVariable=False
    if "filenameVar" in kwargs and kwargs['filenameVar'] :
        kwargs['variable']=kwargs['filenameVar']
        mustHaveVariable=True
    for l in urls :
        template=Template(l)
        # There is no use to look for files which path is not specific
        # to the required variable when we know it should
        if l.find("${variable}") < 0 and mustHaveVariable :
            continue
        #
        # Instantiate keywords in pattern with attributes values
        template=template.safe_substitute(**kwargs)
        #print "template after attributes replace : "+template
        #
        # Construct a pattern for globbing dates
        temp2=template
        dt=dict(YYYY="????",YYYYMM="??????",YYYYMMDD="????????")
        for k in dt : temp2=temp2.replace(k,dt[k])
        clogger.debug("Globbing on : "+temp2)
        lfiles=glob.glob(temp2)
        #
        # Analyze all filenames
        for f in lfiles :
            # print "looking at file"+f
            # Construct regexp for extracting dates from filename
            dt=dict(YYYY="([0-9]{4})",YYYYMM="([0-9]{6})",
                    YYYYMMDD="([0-9]{10})")
            regexp=None
            # print "template before searching dates : "+template
            lkeys=dt.keys() ; lkeys.sort(reverse=True)
            for key in lkeys :
                # print "searchin "+key+" in "+template
                start=template.find(key)
                if (start>=0 ) :
                    # print "found "+key
                    regexp=template.replace(key,dt[key],1)
                    hasEnd=False
                    start=regexp.find(key) 
                    if (start >=0 ) :
                        hasEnd=True
                        regexp=regexp.replace(key,dt[key],1)
                    break
            #
            # Analyze file time period
            fperiod=None
            if regexp :
                regexp=regexp.replace("*",".*").replace("?",r".")
                # print "regexp for extracting dates : "+regexp
                start=re.sub(regexp,r'\1',f)
                if hasEnd :
                    end=re.sub(regexp,r'\2',f)
                    fperiod=init_period("%s-%s"%(start,end))
                else :
                    fperiod=init_period(start)
                #
                # Filter file time period against required period
            else :
                if ( 'frequency' in kwargs and kwargs['frequency']=="fx") :
                    if (l.find("${variable}")>=0) or fileHasVar(f,variable) : 
                        clogger.debug("adding fixed field :"+f)
                        rep.append(f)
                else :
                    clogger.warning("Cannot yet filter files re. time using only file content. TBD")
                    rep.append(f)
            if (fperiod and period.intersects(fperiod)) or not regexp :
                # Filter against variable 
                if (l.find("${variable}")>=0) or fileHasVar(f,variable) : 
                    # Should check time period in the file if not regexp
                    # print "appending "+f
                    rep.append(f)
    return rep
Beispiel #9
0
def selectGenericFiles(urls, **kwargs):
    """
    Allow to describe a ``generic`` file organization : the list of files returned 
    by this function is composed of files which :

    - match the patterns in ``url`` once these patterns are instantiated by 
      the values in kwargs, and 

     - contain the ``variable`` provided in kwargs

     - match the `period`` provided in kwargs

    In the pattern strings, no keyword is mandatory

    Example :

    >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*YYYY*.nc)']
    /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc

    In the pattern strings, the keywords that can be used in addition to the argument
    names (e.g. ${model}) are:
    
    - ${variable} : use it if the files are split by variable and 
      filenames do include the variable name, as this speed up the search

    - YYYY, YYYYMM, YYYYMMDD : use it for indicating the start date of
      the period covered by each file, if this is applicable in the
      file naming; use a second time for end date, if applicable
      (otherwise the assumption is that the whole year -resp. month or
      day- is included in the file

    - wildcards '?' and '*' for matching respectively one and any number of characters


    """
    rep=[]
    period=kwargs['period']
    if type(period) is str : period=init_period(period)
    variable=kwargs['variable']
    altvar=kwargs.get('filenameVar',variable)
    # a dict and an ordered list of date globbing patterns
    dt=dict(YYYY="????",YYYYMM="??????",YYYYMMDD="????????")
    lkeys=dt.keys() ; lkeys.sort(reverse=True)
    # a dict and an ordered list for matching dates
    dr=dict(YYYY="([0-9]{4})",YYYYMM="([0-9]{6})", YYYYMMDD="([0-9]{8})")
    rkeys=dr.keys() ; rkeys.sort(reverse=True)
    #
    for l in urls :
        # Instantiate keywords in pattern with attributes values
        template=Template(l).safe_substitute(**kwargs)
        #print "template after attributes replace : "+template
        #
        # Construct a pattern for globbing dates
        temp2=template ; 
        for k in lkeys : temp2=temp2.replace(k,dt[k])
        lfiles=glob.glob(temp2)
        clogger.debug("Globbing %d files for varname on %s : "%(len(lfiles),temp2))
        #
        # If unsuccessful using varname, try with filenameVar
        if len(lfiles)==0 and "filenameVar" in kwargs and kwargs['filenameVar'] :
            kwargs['variable']=kwargs['filenameVar']
            template=Template(l).safe_substitute(**kwargs)
            temp2=template
            for k in lkeys : temp2=temp2.replace(k,dt[k])
            #
            lfiles=glob.glob(temp2)
            clogger.debug("Globbing %d files for filenamevar on %s: "%(len(lfiles),temp2))

        # Construct regexp for extracting dates from filename
        regexp=None
        #print "template before searching dates : "+template
        for key in rkeys :
            #print "searchin "+key+" in "+=Template(l)
            start=template.find(key)
            if (start>=0 ) :
                #print "found "+key
                regexp=template.replace(key,dr[key],1)
                hasEnd=False
                start=regexp.find(key)
                if (start >=0 ) :
                    hasEnd=True
                    regexp=regexp.replace(key,dr[key],1)
                break
        #print "regexp before searching dates : "+regexp
        #
        for f in lfiles :
            #print "processing file "+f
            #
            # Analyze file time period
            fperiod=None
            if regexp :
                regexp0=regexp.replace("*",".*").replace("?",r".")
                #print "regexp for extracting dates : "+regexp
                start=re.sub(regexp0,r'\1',f)
                if start==f:
                    raise Climaf_Data_Error("Start period not found") #? LV
                if hasEnd :
                    end=re.sub(regexp0,r'\2',f)
                    fperiod=init_period("%s-%s"%(start,end))
                else :
                    fperiod=init_period(start)
                #print "period for file %s is %s"%(f,fperiod)
                #
                # Filter file time period against required period
            else :
                if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \
                    kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) :
                    if (l.find("${variable}")>=0) or fileHasVar(f,variable) or fileHasVar(f,altvar) : 
                        clogger.debug("adding fixed field :"+f)
                        rep.append(f)
                else :
                    clogger.warning("Cannot yet filter files re. time using only file content. TBD")
                    rep.append(f)
            if (fperiod and period.intersects(fperiod)) or not regexp :
                clogger.debug('Period is OK - Considering variable filtering on %s and %s for %s'%(variable,altvar,f)) 
                # Filter against variable 
                if (l.find("${variable}")>=0):
                    clogger.debug('appending %s based on variable in filename'%f)
                    rep.append(f)
                    continue
                if f not in rep and ( fileHasVar(f,variable) or fileHasVar(f,altvar) or ("," in variable)):
                    # Should check time period in the file if not regexp
                    clogger.debug('appending %s based on multi-var or var exists in file '%f)
                    rep.append(f)
            else:
                if not fperiod :
                    clogger.debug('not appending %s because period is None '%f)
                else:
                    if not period.intersects(fperiod) :
                        clogger.debug('not appending %s because period doesn t intersect %s'%(f,period))

    return rep
Beispiel #10
0
def variability_AR5(model,
                    realization,
                    variable,
                    table,
                    data_versions,
                    season="ANN",
                    project="CMIP6",
                    operator=None,
                    operator_args={},
                    post_operator=None,
                    post_operator_args={},
                    shift=100,
                    nyears=20,
                    number=20,
                    variability=True,
                    compute=True,
                    house_keeping=False,
                    detrend=True,
                    deep=None):
    """
    Compute the variability according to AR5 Box 2.1 : 
     - select data time series in piControl for the whole of the samples (from 
       its begin+SHIFT, duration consistent with NUMBER samples of size NYEARS);
       the data variant and version, and the begin date, are selected according to 
       dictionnary DATA_VERSIONS,
     - transform this data using OPERATOR (and its OPERATOR_ARGS) that should produce 
       one value per year (default being to compute annual or seasonal means)
     - detrend that data, if required (this is done by default)
     - build an ensemble representing the samples (NUMBER * NYEARS)
     - transform each member's result using POST_OPERATOR and POST_OPERATOR_ARGS (default 
       is to compute a time average)
     - if arg VARIABILITY is False , returns that result (i.e. by defaut the time mean),
     - otherwise computes and returns the variability as the ensemble standard deviation 
       multiplied by square root of 2

    Arg MODELS_WITH_ENOUGH_SPINUP is the list of those models for which the required 
    SHIFT may be relaxed, because they are supposed to be already in a balanced state 
    from the start of published piControl data

    The returned value is a CliMAF object (either a field or an ensemble, depending on VARIABILITY)

    Arg COMPUTE, if set to True, drives an immediate lauch of the computation, of CliMAF object, 
    and then, if arg DEEP is True, re-compute all results from scratch, without using CliMAF cached
    values for intermediate results.

    Arg HOUSE_KEEPING, if set to True, allows to release CliMAF cache intermediate results, to keep 
    cache use as low as possible

    Used e.g for variability of :
      - plain variables
      - walsh seasonnality index
      - number of  dry days per year 
      - year mean daily precipitation for non-dry days 
      - inter_annual variability for any variable, using :
           * post_operator=inter_annual_variability
           * post_operator_args={"factor" : 1.414}

    This version yet tested only on CMIP6 models 
    """
    init_trend()
    from climaf.operators import ctrend, csubtrend

    if realization not in data_versions["piControl"][variable][table][model]:
        realization = data_versions["piControl"][variable][table][model].keys(
        )[0]
    grid, version, data_period = data_versions["piControl"][variable][table][
        model][realization]

    duration = nyears * number
    true_begin = int(data_period.split('-')[0][0:4])
    end = int(data_period.split('-')[1][0:4])
    begin = true_begin + shift
    if begin + duration - 1 > end:
        # In CMIP6, some models have enough spinup before piControl start,
        # but a too short piControl length
        # We assume that this has been dealt with at the stage of data selection, and allow
        # to release the constraint on shift at the beginning of the data period
        alt_begin = end - duration + 1
        if alt_begin >= true_begin:
            begin = alt_begin
        else:
            message="Duration for %s %s %s %s %s %s is too short : [%d - %d] even with no shift %d is shorter than %d years "%\
                        (model,variable,table,realization,version,grid,true_begin,end,shift,duration)
            raise ValueError(message)
    #
    period = "%g-%g" % (begin, begin + duration - 1)
    base_dict = dict(project=project,
                     experiment="piControl",
                     model=model,
                     institute=institute_for_model(model),
                     period=period,
                     variable=variable,
                     table=table,
                     version=version,
                     grid=grid,
                     realization=realization)
    if project == "CMIP6":
        base_dict.update(mip="CMIP")

    # Basic dataset (e.g. precip)
    basic = ds(**base_dict)
    dat = basic

    # Implement the operation if required, otherwise seasonal or yearly average
    if operator is None:
        if season in ["ann", "ANN", "anm"]:
            dat_op = ccdo(dat, operator="yearmean")
        else:
            dat_op = ccdo_fast(dat, operator="selseason,%s -seasmean" % season)
    else:
        if season in ["ann", "ANN", "anm"]:
            dat_op = operator(dat, **operator_args)
        else:
            dat_season = ccdo_fast(dat, operator="selseason,%s" % season)
            dat_op = operator(dat_season, **operator_args)
    dat = dat_op

    # Detrend the data if required
    if detrend:
        a = ctrend(dat)
        ap = ccdo_fast(a, operator="mulc,0"
                       )  # Do not want to have a zero-mean detrended serie
        detrended = csubtrend(dat, ap, a.b)
        dat = detrended

    # Build an ensemble which members are the slices
    econtrol = cens()
    slices = [
        "%d-%d" % (begin + n * nyears, begin + (n + 1) * nyears - 1)
        for n in range(0, number)
    ]
    for period in slices:
        econtrol[period] = ccdo_fast(dat,
                                     operator="seldate," +
                                     init_period(period).iso())

    # On each slice, implement the required post operation, otherwise compute a plain average
    if post_operator is not None:
        cmeans = cens()
        for member in econtrol:
            cmeans[member] = post_operator(econtrol[member],
                                           **post_operator_args)
    else:
        cmeans = ccdo_fast(econtrol, operator="timmean")

    if variability is True:
        # Compute variability over the slices ensemble
        variab1 = ccdo_ens(cmeans, operator='ensstd1')
        variab = ccdo_fast(variab1, operator="mulc,1.414")  # cf. AR5 Box 2.1

    #
    if compute:
        if variability: cfile(variab, deep=deep)
        else: cfile(cmeans, deep=deep)
    if house_keeping:  # Discard intermediate data
        cdrop(basic)
        cdrop(dat_op)
        if operator is not None and season not in ["ann", "ANN", "anm"]:
            cdrop(dat_season)
        if detrend:
            cdrop(a)
            cdrop(a.b)
            cdrop(ap)
            cdrop(detrended)
        cdrop(dat)
        for period in slices:
            cdrop(econtrol[period])
    #
    if variability:
        if house_keeping:
            cdrop(cmeans)
            cdrop(variab1)
        return variab
    else:
        return cmeans