def preprocess_observations(datapath='./'): # where we are starting from cwd = os.getcwd() # move to where the data is os.chdir(datapath) # Get the reanalysis monthly mean files rean = ['R1', 'R2', '20CR', 'ERA-Int', 'CFSR', 'MERRA'] var = ['slp', 'u10m', 'uflx'] files = [] for r in rean: for v in var: files.extend([r + '_' + v + '.mon.mean.nc']) files = [f for f in files if not f.startswith('remap') and not f.startswith('zonal-mean')] # Add in CCMp and HadSLP2r files files.extend(['CCMP_198701-201112.nc', 'HadSLP2r_slp.mon.mean.nc']) files.extend(['20CR_ens_slp.mon.mean.nc', '20CR_ens_u10m.mon.mean.nc']) for f in files: print f if not os.path.isfile('remap_' + f): cdo.remapdis('r360x180', input=f, output='remap_' + f) if not os.path.isfile('zonal-mean_remap_' + f): cdo.zonmean(input='remap_' + f, output='zonal-mean_remap_' + f) #os.remove(f) # move back os.chdir(cwd)
def zonal_mean(name): out = 'netcdf/zonmean_' + split(name) already_exists = already_calculated(out) if already_exists is not None: return already_exists else: cdo.zonmean(input=name, output=out) return out
def getNCvar(filename,field,timesel=None,levsel=None,monsel=None,seas=None,calc=None,remlon=1,sqz=True,att=False): """ gets a variable from netcdf file. Time is assumed to be the 1st dimension, Lon is assumed to be the last. If any calculations are requested to be performed on the data, the user needs to make sure that the requested operations can be performed (b/c some of the other functions only handle certain # of dims, etc. My bad.) filename: full path to file field: NC variable to read in timesel: comma-delim string of date range in 'YYYY-MM-DD' fmt to select (a la CDO) levsel: select level in Pa (e.g. 50000 for 500hPa) monsel: select month from timeseries seas: seasonally (annually) average or return climatology {climo|ANN|DJF|JJA|NDJ|MAM|SON} calc: zm (zonal mean), remlon: removes extra wrap-around longitude for zonal mean. default is 1, remove it sqz: squeeze the data if 'getting all data'. Default True. Trying to avoid situation where need singular dims and squeeze them out (e.g. MOC variable in CCSM4) att: if True, include the full netcdf variable (including attributes). Most useful for time. returns fld """ # IF ON MAC: CDO bindings don't work yet, use old function 3/25/2014 ######### plat = platform.system() if False: #@@@@TESTING 9/20/2016 plat == 'Darwin': # means I'm on my mac # Call old func if calc != None: print '@@ not sure calc will work on mac. calc=' + calc if levsel!=None: plev= np.array([100, 200, 300, 500, 700, 1000, 2000, 3000, 5000, 7000, 10000, 12500, 15000, 17500, 20000, 22500, 25000, 30000, 35000, 40000, 45000, 50000, 55000, 60000, 65000, 70000, 75000, 77500, 80000, 82500, 85000, 87500, 90000, 92500, 95000, 97500, 100000]) level=cutl.find_nearest(plev,levsel) else: level=None if timesel == '0002-01-01,0061-12-31': print 'hard-coded skipping of first year of 61-yr chunk @@' fld = getNCvar_old(filename,field,seas=seas, monsel=monsel,timechunk=(12,),level=level,calc=calc,sqz=sqz) else: # if timesel=='0002-01-01,0121-12-31' then just don't set timechunk because # files on the mac are already selected to skip first year, and they reside # in the 'timsel' subdirectory. Check for that? if timesel=='0002-01-01,0121-12-31': if 'timsel/' not in filename: print 'On mac, use files in timsel/ subdirectory! @@ NEEDS TESTING' fld = getNCvar_old(filename,field,seas=seas,monsel=monsel,level=level,calc=calc,sqz=sqz,timesel=timesel) # doesn't work with all arguments yet @@ return fld else: # on linux workstation in Vic ncfile = openNC(filename) ndims = len(ncfile.dimensions) ncvar = ncfile.variables[field] #print ncvar # @@@@@ #### READ VARIABLE FROM NC FILE ######## if timesel == None and calc == None: if levsel !=None: if monsel != None: fld = np.squeeze(cdo.sellevel(levsel,input = cdo.selmon(monsel,input = filename),returnMaArray = field)) else: fld = np.squeeze(cdo.sellevel(levsel,input = filename, returnMaArray = field)) os.system('rm -rf /tmp/cdoPy*') else: if monsel != None: #print 'timesel==None and calc==None and monsel !=None' fld = np.squeeze(cdo.selmon(monsel,input = filename, returnMaArray = field)) #print fld.shape os.system('rm -rf /tmp/cdoPy*') else: # get everything if sqz: #print field + ': squeezing data upon read all' # @@@ # for most situations, this is what we want. @@@@ fld=np.squeeze(ncfile.variables[field][...]) else: fld = ncfile.variables[field][...] elif timesel != None and calc == 'zm': # have to remove the lon before zonal mean, which means have to separate the # select dates and zm. thus can't use CDO for zm (unless can pass it data instead of a file?) #fld = np.squeeze(cdo.zonmean( input = cdo.seldate(timesel,input = filename), returnMaArray = field)) print 'assuming T42(63) 64x128 resolution for zonal mean' if levsel != None: if monsel != None: fld = np.squeeze(cdo.seldate(timesel,input = cdo.zonmean( input = cdo.selindexbox(1,128,1,64,input = cdo.sellevel(levsel,input = cdo.selmon(monsel, input = filename)))), returnMaArray = field))# @@@@ else: fld = np.squeeze(cdo.seldate(timesel,input = cdo.zonmean( input = cdo.selindexbox(1,128,1,64,input = cdo.sellevel(levsel,input = filename))), returnMaArray = field)) else: if monsel != None: fld = np.squeeze(cdo.seldate(timesel,input = cdo.zonmean( input = cdo.selindexbox(1,128,1,64,input = cdo.selmon(monsel,input = filename))), returnMaArray = field)) else: fld = np.squeeze(cdo.seldate(timesel,input = cdo.zonmean(input = cdo.selindexbox(1,128,1,64,input = filename)), returnMaArray = field)) os.system('rm -rf /tmp/cdoPy*') ## if remlon: ## # remove extra lon ## fld = np.squeeze(fld[...,0:-1]) ## lastdimidx = ndims-1 ## fld = np.mean(fld,lastdimidx) elif timesel != None and calc != None: if levsel != None and monsel == None: fld = np.squeeze(cdo.seldate(timesel,input = cdo.sellevel(levsel,input = filename), returnMaArray = field)) elif levsel != None and monsel != None: fld = np.squeeze( cdo.seldate(timesel,input = cdo.sellevel(levsel,input = cdo.selmon(monsel,input = filename)), returnMaArray = field)) elif levsel == None and monsel != None: fld = np.squeeze(cdo.seldate(timesel,input = cdo.selmon(monsel,input = filename), returnMaArray = field)) else: # levsel and monsel are both None fld = np.squeeze(cdo.seldate(timesel,input = filename, returnMaArray = field)) os.system('rm -rf /tmp/cdoPy*') print "only calc='zm' is implemented now. Returning only selected date range/level/month." elif timesel != None: if levsel != None and monsel == None: fld = np.squeeze(cdo.seldate(timesel,input = cdo.sellevel(levsel,input = filename),returnMaArray = field)) elif levsel != None and monsel != None: fld = np.squeeze( cdo.seldate(timesel,input = cdo.sellevel(levsel,input = cdo.selmon(monsel,input = filename)), returnMaArray = field)) elif levsel == None and monsel != None: fld = np.squeeze(cdo.seldate(timesel,input = cdo.selmon(monsel,input = filename), returnMaArray = field)) else: # levsel and monsel are both None fld = np.squeeze(cdo.seldate(timesel,input = filename, returnMaArray = field)) os.system('rm -rf /tmp/cdoPy*') elif calc == 'zm': # and timesel must be None print 'assuming T42(63) 64x128 resolution for zonal mean' if levsel != None and monsel == None: fld = np.squeeze(cdo.sellevel(levsel,input = cdo.zonmean(input = cdo.selindexbox(1,128,1,64,input = filename)), returnMaArray = field)) elif levsel != None and monsel != None: fld = np.squeeze( cdo.sellevel(levsel,input = cdo.zonmean(input = cdo.selindexbox(1,128,1,64,input = cdo.selmon(monsel,input = filename))), returnMaArray = field)) elif levsel == None and monsel != None: fld = np.squeeze(cdo.zonmean(input = cdo.selindexbox(1,128,1,64,input = cdo.selmon(monsel,input = filename)), returnMaArray = field)) else: # get all data fld = np.squeeze(cdo.zonmean(input = cdo.selindexbox(1,128,1,64,input = filename), returnMaArray = field)) #print '@@ getting memory errors here...try using CDO to select appropriate lons for the zm calc' #fld = ncfile.variables[field][...] # have to get field before removing lon os.system('rm -rf /tmp/cdoPy*') ## if remlon: ## # remove extra lon ## if ndims==4: ## fld = np.squeeze(fld[:,:,:,0:-1]) ## elif ndims==3: ## fld = np.squeeze(fld[:,:,0:-1]) ## else: # shouldn't really get here, not expecting 2D (time x lon?) ## fld = np.squeeze(fld[:,0:-1]) ## lastdimidx = ndims-1 ## fld = np.mean(fld,lastdimidx) else: print "huh? timesel and calc combo doesn't make sense" ####### TIME AVERAGE the VARIABLE ########## # fld has to be 3d by the time it is passed to func # (time,lev,lat) or (time,lat,lon) if seas != None: #print 'getNCvar(): seas!=None: fld.shape: ' + str(fld.shape) # @@@ ## if fld.ndim != 3: ## ## if 1 in fld.shape: ## ## fld=fld.squeeze() # attempting to deal with spurious dims of 1 @@@ ## ## if fld.ndim != 3: ## ## print 'data must be 3 dimensional to seasonalize()' ## ## return ## ## else: ## print 'data must be 3 dimensional to seasonalize()' ## return if monsel != None: print "Can't do seasonal average when monsel != None" return elif seas == 'climo': fld,stddev = cutl.climatologize(fld) elif type(seas) == int: # @@ does this work? #elif seas not in ('ANN','DJF','JJA','MAM','SON','NDJ'): # means seas is an int value for a month #fld = cutl.seasonalize_monthlyts(fld,mo=seas) fld = cutl.seasonalize(fld,mo=seas) else: #print 'seasonalizing' #fld = cutl.seasonalize_monthlyts(fld,season=seas) fld = cutl.seasonalize(fld,season=seas) #print fld.shape # Apply any scaling and offsetting needed: try: var_offset = ncvar.add_offset except: var_offset = 0 try: var_scale = ncvar.scale_factor print 'var_scale ' + str(var_scale) except: var_scale = 1 fld = fld*var_scale + var_offset ncfile.close() return fld
# OR: select out specific dates, time-mean then zonal mean # xm_fgco2 = cdo.zonmean( input = cdo.timmean( input = cdo.seldate('1990-01-01,2005-12-31', input=ifile ) ) ,returnMaArray ='fgco2') # this works: fldcselcdo = cdo.seldate('0002-01-01,0061-12-31', input = fnamec, returnArray = ncfield ) #fldctmcdo = np.squeeze(cdo.timmean(input = cdo.seldate('0002-01-01,0061-12-31', input = fnamec), returnArray = ncfield )) # @@@@ move these to functions so can call os.system() ## fldczmcdo = np.squeeze( ## cdo.zonmean( input = ## cdo.timmean(input = ## cdo.seldate('0002-01-01,0061-12-31', input = fnamec ) ), ## returnMaArray = ncfield)) fldczmcdo = np.squeeze( cdo.zonmean( input = cdo.timmean(input = fnamec),returnMaArray = ncfield)) os.system('rm -rf /tmp/cdoPy*') ## fldpzmcdo = np.squeeze( ## cdo.zonmean( input = ## cdo.timmean(input = ## cdo.seldate('0002-01-01,0061-12-31', input = fnamep2 ) ), ## returnMaArray = ncfield)) lats,levs = np.meshgrid(lat,lev) plotfld = fldczmtm - fldczmcdo
def _zonal_mean(self, name): out = 'netcdf/zonmean_' + self._split(name) if not os.path.isfile(out): cdo.zonmean(input=name, output=out) return out