def preprocess_observations(datapath='./'):
    # where we are starting from
    cwd = os.getcwd()
    # move to where the data is
    os.chdir(datapath)
    # Get the reanalysis monthly mean files
    rean = ['R1', 'R2', '20CR', 'ERA-Int', 'CFSR', 'MERRA']
    var = ['slp', 'u10m', 'uflx']
    files = []
    for r in rean:
        for v in var:
            files.extend([r + '_' + v + '.mon.mean.nc'])

    files = [f for f in files if not f.startswith('remap') and not 
            f.startswith('zonal-mean')]
    # Add in CCMp and HadSLP2r files
    
    files.extend(['CCMP_198701-201112.nc', 'HadSLP2r_slp.mon.mean.nc'])
    files.extend(['20CR_ens_slp.mon.mean.nc', '20CR_ens_u10m.mon.mean.nc'])

    for f in files:
        print f
        if not os.path.isfile('remap_' + f):
            cdo.remapdis('r360x180', input=f, output='remap_' + f)
        if not os.path.isfile('zonal-mean_remap_' + f):        
            cdo.zonmean(input='remap_' + f, output='zonal-mean_remap_' + f)
        #os.remove(f)

    # move back
    os.chdir(cwd)
Example #2
0
def zonal_mean(name):
    out = 'netcdf/zonmean_' + split(name)
    already_exists = already_calculated(out)
    if already_exists is not None:
        return already_exists
    else:
        cdo.zonmean(input=name, output=out)
    return out
Example #3
0
def zonal_mean(name):
    out = 'netcdf/zonmean_' + split(name)
    already_exists = already_calculated(out)
    if already_exists is not None:
        return already_exists
    else:
        cdo.zonmean(input=name, output=out)
    return out
Example #4
0
def getNCvar(filename,field,timesel=None,levsel=None,monsel=None,seas=None,calc=None,remlon=1,sqz=True,att=False):
    """ gets a variable from netcdf file.
        Time is assumed to be the 1st dimension, Lon is assumed to be the last.
        If any calculations are requested to be performed on the data, the user
        needs to make sure that the requested operations can be performed (b/c
        some of the other functions only handle certain # of dims, etc. My bad.)

        filename: full path to file
        field: NC variable to read in
        timesel: comma-delim string of date range in 'YYYY-MM-DD' fmt to select (a la CDO)
        levsel: select level in Pa (e.g. 50000 for 500hPa)
        monsel: select month from timeseries
        seas: seasonally (annually) average or return climatology {climo|ANN|DJF|JJA|NDJ|MAM|SON}
        calc: zm (zonal mean),
        remlon: removes extra wrap-around longitude for zonal mean.
                default is 1, remove it
        sqz:  squeeze the data if 'getting all data'. Default True.
                   Trying to avoid situation where need singular dims and squeeze them out
                   (e.g. MOC variable in CCSM4)
        att: if True, include the full netcdf variable (including attributes). Most useful for time.
        
        returns fld
        """
# IF ON MAC: CDO bindings don't work yet, use old function 3/25/2014 #########

    plat = platform.system()

    if False: #@@@@TESTING 9/20/2016 plat == 'Darwin':  # means I'm on my mac
        # Call old func
        if calc != None:
            print '@@ not sure calc will work on mac. calc=' + calc

        if levsel!=None:
            plev= np.array([100, 200, 300, 500, 700, 1000, 2000, 3000, 5000, 7000, 10000, 12500,
                   15000, 17500, 20000, 22500, 25000, 30000, 35000, 40000, 45000, 50000,
                   55000, 60000, 65000, 70000, 75000, 77500, 80000, 82500, 85000, 87500,
                   90000, 92500, 95000, 97500, 100000])

            level=cutl.find_nearest(plev,levsel)
        else:
            level=None        

        if timesel == '0002-01-01,0061-12-31':
            print 'hard-coded skipping of first year of 61-yr chunk @@'
            fld = getNCvar_old(filename,field,seas=seas, monsel=monsel,timechunk=(12,),level=level,calc=calc,sqz=sqz)
        else:
            # if timesel=='0002-01-01,0121-12-31' then just don't set timechunk because 
            #     files on the mac are already selected to skip first year, and they reside
            #     in the 'timsel' subdirectory. Check for that?
            if timesel=='0002-01-01,0121-12-31':
                if 'timsel/' not in filename:
                    print 'On mac, use files in timsel/ subdirectory! @@ NEEDS TESTING'

            fld = getNCvar_old(filename,field,seas=seas,monsel=monsel,level=level,calc=calc,sqz=sqz,timesel=timesel) # doesn't work with all arguments yet @@

        
        return fld

    else:  # on linux workstation in Vic

        ncfile = openNC(filename)
        ndims = len(ncfile.dimensions)
        ncvar = ncfile.variables[field]
        #print ncvar # @@@@@

        
        #### READ VARIABLE FROM NC FILE ########
        if timesel == None and calc == None:

            if levsel !=None:
                if monsel != None:
                    fld = np.squeeze(cdo.sellevel(levsel,input = cdo.selmon(monsel,input = filename),returnMaArray = field))
                else:
                    fld = np.squeeze(cdo.sellevel(levsel,input = filename, returnMaArray = field))
                os.system('rm -rf /tmp/cdoPy*')
            else:
                if monsel != None:
                    #print 'timesel==None and calc==None and monsel !=None'
                    fld = np.squeeze(cdo.selmon(monsel,input = filename, returnMaArray = field))
                    #print fld.shape
                    os.system('rm -rf /tmp/cdoPy*')
                else: # get everything
                    if sqz:
                        #print field + ': squeezing data upon read all' # @@@
                        # for most situations, this is what we want. @@@@
                        fld=np.squeeze(ncfile.variables[field][...])
                    else:
                        fld = ncfile.variables[field][...]

        elif timesel != None and calc == 'zm':
            # have to remove the lon before zonal mean, which means have to separate the
            # select dates and zm. thus can't use CDO for zm (unless can pass it data instead of a file?)

            #fld = np.squeeze(cdo.zonmean( input = cdo.seldate(timesel,input = filename), returnMaArray = field))
            print 'assuming T42(63) 64x128 resolution for zonal mean'
            if levsel != None:
                if monsel != None:
                    fld = np.squeeze(cdo.seldate(timesel,input = cdo.zonmean( input = 
                                                 cdo.selindexbox(1,128,1,64,input =
                                                                 cdo.sellevel(levsel,input =
                                                                              cdo.selmon(monsel, input = filename)))),
                                                 returnMaArray = field))# @@@@
                else:
                    fld = np.squeeze(cdo.seldate(timesel,input = cdo.zonmean( input = 
                                                 cdo.selindexbox(1,128,1,64,input =
                                                                 cdo.sellevel(levsel,input = filename))),
                                                 returnMaArray = field))
            else:
                if monsel != None:
                    fld = np.squeeze(cdo.seldate(timesel,input =
                                                 cdo.zonmean( input =
                                                              cdo.selindexbox(1,128,1,64,input =
                                                                              cdo.selmon(monsel,input = filename))),
                                                              returnMaArray = field))
                else:
                    fld = np.squeeze(cdo.seldate(timesel,input =
                                                 cdo.zonmean(input =
                                                             cdo.selindexbox(1,128,1,64,input = filename)),
                                      returnMaArray = field))
            os.system('rm -rf /tmp/cdoPy*')

            ## if remlon:
            ##     # remove extra lon
            ##     fld = np.squeeze(fld[...,0:-1])

            ## lastdimidx = ndims-1
            ## fld = np.mean(fld,lastdimidx)  
            

        elif timesel != None and calc != None:
            if levsel != None and monsel == None:
                fld = np.squeeze(cdo.seldate(timesel,input =
                                             cdo.sellevel(levsel,input = filename),
                                             returnMaArray = field))
            elif levsel != None and monsel != None:
                fld = np.squeeze(
                    cdo.seldate(timesel,input =
                                cdo.sellevel(levsel,input =
                                             cdo.selmon(monsel,input = filename)),
                                returnMaArray = field))
            elif levsel == None and monsel != None:
                fld = np.squeeze(cdo.seldate(timesel,input =
                                  cdo.selmon(monsel,input = filename),
                                  returnMaArray = field))
            else: # levsel and monsel are both None
                fld = np.squeeze(cdo.seldate(timesel,input = filename, returnMaArray = field))
            os.system('rm -rf /tmp/cdoPy*')
            print "only calc='zm' is implemented now. Returning only selected date range/level/month."

        elif timesel != None:
            if levsel != None and monsel == None:
                fld = np.squeeze(cdo.seldate(timesel,input = cdo.sellevel(levsel,input = filename),returnMaArray = field))
            elif levsel != None and monsel != None:
                fld = np.squeeze(
                    cdo.seldate(timesel,input =
                                cdo.sellevel(levsel,input =
                                             cdo.selmon(monsel,input = filename)),
                                returnMaArray = field))
            elif levsel == None and monsel != None:
                fld = np.squeeze(cdo.seldate(timesel,input =
                                  cdo.selmon(monsel,input = filename),
                                  returnMaArray = field))
            else: # levsel and monsel are both None
                fld = np.squeeze(cdo.seldate(timesel,input = filename, returnMaArray = field))
                
            os.system('rm -rf /tmp/cdoPy*')
            
        elif calc == 'zm': # and timesel must be None
            print 'assuming T42(63) 64x128 resolution for zonal mean'
            
            if levsel != None and monsel == None:
                fld = np.squeeze(cdo.sellevel(levsel,input =
                                              cdo.zonmean(input =
                                                          cdo.selindexbox(1,128,1,64,input =
                                                                          filename)),
                                              returnMaArray = field))

            elif levsel != None and monsel != None:
                fld = np.squeeze(
                    cdo.sellevel(levsel,input =
                                 cdo.zonmean(input =
                                             cdo.selindexbox(1,128,1,64,input =
                                                             cdo.selmon(monsel,input =
                                                                        filename))),
                                 returnMaArray = field))

            elif levsel == None and monsel != None:
                fld = np.squeeze(cdo.zonmean(input =
                                             cdo.selindexbox(1,128,1,64,input =
                                                             cdo.selmon(monsel,input =
                                                                        filename)),
                                                 returnMaArray = field))
               
            else: # get all data
                fld = np.squeeze(cdo.zonmean(input =
                                             cdo.selindexbox(1,128,1,64,input =
                                                             filename),
                                             returnMaArray = field))
                
                #print '@@ getting memory errors here...try using CDO to select appropriate lons for the zm calc'
                #fld = ncfile.variables[field][...] # have to get field before removing lon

            os.system('rm -rf /tmp/cdoPy*')
            ## if remlon:
            ##     # remove extra lon
            ##     if ndims==4:
            ##         fld = np.squeeze(fld[:,:,:,0:-1])
            ##     elif ndims==3:
            ##         fld = np.squeeze(fld[:,:,0:-1])
            ##     else: # shouldn't really get here, not expecting 2D (time x lon?)
            ##         fld = np.squeeze(fld[:,0:-1])
            ## lastdimidx = ndims-1
            ## fld = np.mean(fld,lastdimidx)  
            

        else:
            print "huh? timesel and calc combo doesn't make sense"


        

        ####### TIME AVERAGE the VARIABLE ##########
        # fld has to be 3d by the time it is passed to func
        #  (time,lev,lat) or (time,lat,lon)
        if seas != None:
            #print 'getNCvar(): seas!=None: fld.shape: ' + str(fld.shape) # @@@
            
            ## if fld.ndim != 3:
            ##     ## if 1 in fld.shape:
            ##     ##     fld=fld.squeeze() # attempting to deal with spurious dims of 1 @@@
            ##     ##     if fld.ndim != 3:
            ##     ##         print 'data must be 3 dimensional to seasonalize()'
            ##     ##         return
            ##     ## else:
            ##     print 'data must be 3 dimensional to seasonalize()'
            ##     return
            
            if monsel != None:
                print "Can't do seasonal average when monsel != None"
                return
            elif seas == 'climo':
                fld,stddev = cutl.climatologize(fld)
            elif type(seas) == int: # @@ does this work?
                #elif seas not in ('ANN','DJF','JJA','MAM','SON','NDJ'):
                # means seas is an int value for a month
                
                #fld = cutl.seasonalize_monthlyts(fld,mo=seas)
                fld = cutl.seasonalize(fld,mo=seas)
            else:
                #print 'seasonalizing'
                #fld = cutl.seasonalize_monthlyts(fld,season=seas)
                fld = cutl.seasonalize(fld,season=seas)
                #print fld.shape


        # Apply any scaling and offsetting needed:
        try:
            var_offset = ncvar.add_offset
        except:
            var_offset = 0
            
        try:
            var_scale = ncvar.scale_factor
            print 'var_scale ' + str(var_scale)
        except:
            var_scale = 1
            
        
        fld = fld*var_scale + var_offset

        ncfile.close()
        return fld
Example #5
0
#   OR: select out specific dates, time-mean then zonal mean
# xm_fgco2 = cdo.zonmean( input = cdo.timmean( input = cdo.seldate('1990-01-01,2005-12-31', input=ifile ) ) ,returnMaArray ='fgco2')

# this works: fldcselcdo = cdo.seldate('0002-01-01,0061-12-31', input = fnamec, returnArray = ncfield )

#fldctmcdo = np.squeeze(cdo.timmean(input = cdo.seldate('0002-01-01,0061-12-31', input = fnamec), returnArray = ncfield ))

# @@@@ move these to functions so can call os.system()
## fldczmcdo = np.squeeze(
##     cdo.zonmean( input =
##                  cdo.timmean(input =
##                              cdo.seldate('0002-01-01,0061-12-31', input = fnamec ) ),
##                  returnMaArray  = ncfield))

fldczmcdo = np.squeeze(
    cdo.zonmean( input =
                 cdo.timmean(input = fnamec),returnMaArray  = ncfield))
os.system('rm -rf /tmp/cdoPy*')

## fldpzmcdo = np.squeeze(
##     cdo.zonmean( input =
##                  cdo.timmean(input =
##                              cdo.seldate('0002-01-01,0061-12-31', input = fnamep2 ) ),
##                  returnMaArray  = ncfield))  



lats,levs = np.meshgrid(lat,lev)



plotfld = fldczmtm - fldczmcdo
Example #6
0
 def _zonal_mean(self, name):
     out = 'netcdf/zonmean_' + self._split(name)
     if not os.path.isfile(out):
         cdo.zonmean(input=name, output=out)
     return out