sim = 'ENSE' sea = 'SON' #1 #'DJF' if threed: uncfield='U' vncfield='V' ufield='u' + str(level) vfield='v' + str(level) level=str(level/100) # for printing figs else: level='' fuc,fup=con.build_filepathpair(sim,ufield) lat=cnc.getNCvar(fuc,'lat') lon=cnc.getNCvar(fuc,'lon') uc=cnc.getNCvar(fuc,uncfield,timesel='0002-01-01,0121-12-31',seas=sea) up=cnc.getNCvar(fup,uncfield,timesel='0002-01-01,0121-12-31',seas=sea) ucm=np.mean(uc,axis=0) # time mean upm=np.mean(up,axis=0) ud=upm-ucm fvc,fvp=con.build_filepathpair(sim,vfield) vc=cnc.getNCvar(fvc,vncfield,timesel='0002-01-01,0121-12-31',seas=sea) vp=cnc.getNCvar(fvp,vncfield,timesel='0002-01-01,0121-12-31',seas=sea)
type = "nh" # plot type latlims = [20, 90] lonlims = [0, 180] # lonlims=[0,360] limsdict = {"latlims": latlims, "lonlims": lonlims} if docorr: # show EOF as correlation. otherwise, covariance cmin = -1 cmax = 1 else: cmin = "" cmax = "" fnamec, fnamep = con.build_filepathpair(sim, field) fldc = cnc.getNCvar(fnamec, ncfield, seas=sea) fldp = cnc.getNCvar(fnamep, ncfield, seas=sea) lon = cnc.getNCvar(fnamec, "lon") lat = cnc.getNCvar(fnamec, "lat") fldca = np.squeeze(fldc - np.mean(fldc, axis=0)) # remove time mean fldpa = np.squeeze(fldp - np.mean(fldp, axis=0)) if subset: msk1 = con.get_t63regionmask("other", limsdict=limsdict) fldca, msk = cutl.mask_region(fldca, lat, lon, "other", limsdict=limsdict) fldpa, msk = cutl.mask_region(fldpa, lat, lon, "other", limsdict=limsdict)
def pattcorr_ensemble(ename, field, latlim=60): # @@@@@@@@@@@@ is this fully implemented? Don't think so. 12/2/14 if ename=='ANT': ename='HistIC' elif ename=='TOT': ename='HistBC' enssims = con.build_ensemblesims(ename) ensnum=len(enssims) # ======= copied from =========== #ensnum=5 diffdict = {} pcmeandict = {} # fldp-fldc pattern corr compared to mean BC pchaddict = {} # fldp-fldc pattern corr compared to hadisst seadiffdict = {} # seasonal mean pcseameandict = {} pcsea2meandict = {} # to test the other pattern corr calc pcsea2pvalmeandict = {} # to test the other pattern corr calc # generate weights for the pattern corr lat = con.get_t63lat() lon = con.get_t63lon() areas = cutl.calc_cellareas(lat,lon) areas = areas[lat>latlim,:] weights = areas / np.sum(np.sum(areas,axis=1),axis=0) #for eii in range(1,ensnum+1): for skey in enssims: #skey = etype + str(eii) #casenamec = bcasenamec + skey #casenamep = bcasenamep + skey #fnamec = basepath + casenamec+ subdir + casenamec + '_' + field + '' #fnamep = basepath + casenamep+ subdir + casenamep + '_' + field + '' fnamec,fnamep = con.build_filepathpair(skey,field) # monthly calc fldc = cnc.getNCvar(fnamec,ncfield,timesel=timesel)*conv fldp = cnc.getNCvar(fnamep,ncfield,timesel=timesel)*conv fldd = fldp-fldc # take the pattern correlation flddclimo,flddstd = cutl.climatologize(fldd) # climo first (don't need to do for BCs technically) flddcclimo,flddcstd = cutl.climatologize(flddc) # climo first. baseline diff data diffdict[skey] = flddclimo # for each month, compute pattern corr pc = np.zeros((12)) for mii,mon in enumerate(con.get_mon()): tmp = np.squeeze(flddclimo[mii,lat>latlim,...]) tmpcmp = np.squeeze(flddcclimo[mii,lat>latlim,...]) pc[mii] = cutl.pattcorr(tmp.flatten()*weights.flatten(),tmpcmp.flatten()*weights.flatten()) pcmeandict[skey] = pc # monthly # seasonal calc fldcsea = np.zeros((4,len(lat),len(lon))) fldpsea = np.zeros((4,len(lat),len(lon))) flddsea = np.zeros((4,len(lat),len(lon))) pcsea = np.zeros((4)) pcsea2 = np.zeros((4)) # test pattcorr_pearson() @@ pcsea2pval = np.zeros((4)) # test pattcorr_pearson() for seaii,sea in enumerate(seasons): fldcsea[seaii,...] = np.mean(cnc.getNCvar(fnamec,ncfield,timesel=timesel,seas=sea)*conv,axis=0) fldpsea[seaii,...] = np.mean(cnc.getNCvar(fnamep,ncfield,timesel=timesel,seas=sea)*conv,axis=0) flddsea[seaii,...] = fldpsea[seaii,...]-fldcsea[seaii,...] tmp = np.squeeze(flddsea[seaii,lat>latlim,...]) tmpcmp = np.squeeze(flddcsea[seaii,lat>latlim,...]) pcsea[seaii] = cutl.pattcorr(tmp.flatten()*weights.flatten(), tmpcmp.flatten()*weights.flatten()) pcsea2[seaii],pcsea2pval[seaii] = cutl.pattcorr_pearson(tmp.flatten()*weights.flatten(), tmpcmp.flatten()*weights.flatten()) seadiffdict[skey] = flddsea pcseameandict[skey] = pcsea pcsea2meandict[skey] = pcsea2 pcsea2pvalmeandict[skey] = pcsea2pval
def pattcorr_withinensemble(ename,fdict,latlim=60,timesel='0002-01-01,0121-12-31'): """ pattcorr_withinensemble(ename,field,latlim=60) pattern corr each member of ensemble with each other one return pctable, pctablesea (DataFrames) """ # @@ need diffdict field=fdict['field'] ncfield=fdict['ncfield'] conv=fdict['conv'] seasons=('SON','DJF','MAM','JJA') if ename=='ANT': ename='histIC' elif ename=='TOT': ename='histBC' enssims = con.build_ensemblesims(ename) ensnum=len(enssims) print 'ENSSIMS: ' # @@@ print enssims # @@ # generate weights for the pattern corr lat = con.get_t63lat() lon = con.get_t63lon() areas = cutl.calc_cellareas(lat,lon) areas = areas[lat>latlim,:] weights = areas / np.sum(np.sum(areas,axis=1),axis=0) # ========= create diffdict first ===== diffdict={} seadiffdict={} for skey in enssims: fnamec,fnamep = con.build_filepathpair(skey,field) # monthly calc fldc = cnc.getNCvar(fnamec,ncfield,timesel=timesel)*conv fldp = cnc.getNCvar(fnamep,ncfield,timesel=timesel)*conv fldd = fldp-fldc # Monthly flddclimo,flddstd = cutl.climatologize(fldd) # climo first (don't need to do for BCs technically) #flddcclimo,flddcstd = cutl.climatologize(flddc) # climo first. baseline diff data diffdict[skey] = flddclimo print skey + ' ' + str(flddclimo.shape) # @@@ # Seasonal flddsea = np.zeros((4,len(lat),len(lon))) for seaii,sea in enumerate(seasons): fldcsea = np.mean(cnc.getNCvar(fnamec,ncfield,timesel=timesel,seas=sea)*conv,axis=0) fldpsea = np.mean(cnc.getNCvar(fnamep,ncfield,timesel=timesel,seas=sea)*conv,axis=0) flddsea[seaii,...] = fldpsea-fldcsea seadiffdict[skey] = flddsea # ======= Now do pattern corrs within ensemble ==== # ======= copied from =========== outterdict= dict.fromkeys(enssims) for skey1 in enssims: outfld = diffdict[skey1] innerdict = dict.fromkeys(enssims) for skey2 in enssims: #print skey1 + ' compared to ' + skey2 infld = diffdict[skey2] # for each month, compute pattern corr pc = np.zeros((12)) for mii,mon in enumerate(con.get_mon()): tmp = np.squeeze(infld[mii,lat>latlim,...]) tmpcmp = np.squeeze(outfld[mii,lat>latlim,...]) pc[mii] = cutl.pattcorr(tmp.flatten()*weights.flatten(), tmpcmp.flatten()*weights.flatten()) innerdict[skey2] = pc outterdict[skey1] = innerdict pctable = pd.DataFrame(outterdict) # 5x5 # seasonal outterdictsea= dict.fromkeys(enssims) for skey1 in enssims: outfld = seadiffdict[skey1] innerdictsea = dict.fromkeys(enssims) for skey2 in enssims: #print skey1 + ' compared to ' + skey2 infld = seadiffdict[skey2] # for each season, compute pattern corr pcsea = np.zeros((4)) for seaii,sea in enumerate(seasons): tmp = np.squeeze(infld[seaii,lat>latlim,...]) tmpcmp = np.squeeze(outfld[seaii,lat>latlim,...]) pcsea[seaii] = cutl.pattcorr(tmp.flatten()*weights.flatten(), tmpcmp.flatten()*weights.flatten()) innerdictsea[skey2] = pcsea outterdictsea[skey1] = innerdictsea pctablesea = pd.DataFrame(outterdictsea) # 5x5 return pctable, pctablesea
def loaddata(fields, simulations, ncfields=None,model='CanAM4',timeper='001-121',timefreq=None, levsel=None,meantype=None,filetype='diff',region=None,alsomask=None,rettype='dict'): """ loaddata(fields, simulations,ncfields=None,model='CanAM4',timeper='001-121',timefreq=None, levsel=None, meantype=None,filetype='diff',region=None) fields: tuple of field names [@@update. only one field now but still need tuple] simulations: tuple of simulation names (diff names='E1'...'ENS','NSIDC' etc) ncfields: tuple of ncfield names (var name in file itself). Default to upper case of field model: for now only 'CanAM4' is implemented timeper: time period of the data (used for filename). default '001-121' timefreq: time frequency TO RETURN. default all data 'monthly'|'seasonal'|'climo'|'ANN'|'DJF'|'JJA'|'NDJ'|'MAM'|'SON'| 1,2,3,4,5,6,7,8,9,10,11,12 levsel: select level in Pa (e.g. 50000 for 500hPa). default all levels meantype: 'time','zonal' @@for now. default None, but recommended to choose one if loading multiple variables and multiple simulations at once. It is assumed that time is the first dimension. filetype: 'diff','ctl','pert','pval' Default is diff where both ctl and pert are read in and differenced. region: any of the regions in constants -> region dict. default None. alsomask: if specified as 'land' or 'ocean', then calc_regmean() will mask before computing regional avg (ie. will NOT include it in average). Only used if region!=None. Default None. returns: nested dictionary@@ FIELDS->SIMULATIONS->TIMEFREQ Load requested fields from requested CanAM4 simulations into dictionaries (dataframes?). Function automatically skips first year of simulation and gets a timeseries (or climo if requested) of the rest (assumed through year 121). 3D data and 'turb' not yet implemented! @@ """ if model!='CanAM4': print 'model not supported!' return -1 print '@@ probably should invert the order such that it is field, season, sim?' #bp=con.get_basepath() #basepath=bp['basepath'] + model + '/'; subdir=bp['subdir'] timesel='0002-01-01,0121-12-31' seabool=False # set to True if requested time freq is one season or climo monbool=False # set to True if requested time freq is one month if timefreq=='monthly': # get all months tf = con.get_mon() elif timefreq=='seasonal': # get all 4 seasons tf = 'DJF','MAM','JJA','SON' elif timefreq in range(1,13): # choose an individual month tf=timefreq monbool=True elif timefreq in ('climo','ANN','DJF','JJA','NDJ','MAM','SON','ND','JF','SO'): tf=timefreq seabool=True print tf # @@ # @@@@@ add handling of sia! #datadict = dict.fromkeys(fields,{}) #for fii,field in enumerate(fields): if 1: # GET RID OF FIELD dim too 5/8/2015 fii=0; field=fields[0] if ncfields==None: ncfield=field.upper() else: ncfield=ncfields[fii] print field,ncfield #@@ # assume simulations entered are of the form: E1, R3, ENSE etc. Then # filetype input arg tells which simulation to get (or both) simdict = dict.fromkeys(simulations,{}) for sim in simulations: print sim # @@ #timdict = dict.fromkeys(tf) # construct filename here @@ fnamec,fnamep = con.build_filepathpair(sim,field) #fname = basepath+sim+subdir+sim+'_'+field+'_'+timeper+'' print fnamec tfkey = tf #for tfkey in tf: # print tfkey # @@ #print 'too many levels in the dict...get rid of seasonal keys and just do one@@@ 5/1/2015' # @@ get the data with cnc.getNCvar() here ncparams = {} if monbool: ncparams = {'monsel': tfkey} elif seabool: ncparams = {'seas': tfkey} if levsel!=None: ncparams['levsel'] = levsel if meantype=='zonal': ncparams['calc'] = 'zm' if filetype=='diff' or filetype=='pval': if field in ('turb','net'): fnamec,fnamep = con.build_filepathpair(sim,'hfl') fnamecb,fnamepb = con.build_filepathpair(sim,'hfs') ctl = cnc.getNCvar(fnamec,'HFL',timesel=timesel,**ncparams) +\ cnc.getNCvar(fnamecb,'HFS',timesel=timesel, **ncparams) pert = cnc.getNCvar(fnamep,'HFL',timesel=timesel,**ncparams) +\ cnc.getNCvar(fnamepb,'HFS',timesel=timesel,**ncparams) if field=='net': fnamecc,fnamepc = con.build_filepathpair(sim,'flg') ctl = ctl - cnc.getNCvar(fnamecc,'FLG',timesel=timesel,**ncparams) pert = pert - cnc.getNCvar(fnamepc,'FLG',timesel=timesel,**ncparams) else: pert = cnc.getNCvar(fnamep,ncfield,timesel=timesel,**ncparams) ctl = cnc.getNCvar(fnamec,ncfield,timesel=timesel,**ncparams) fld = pert - ctl elif filetype=='ctl': if field in ('turb','net'): fnamec,fnamep = con.build_filepathpair(sim,'hfl') fnamecb,fnamepb = con.build_filepathpair(sim,'hfs') fld = cnc.getNCvar(fnamec,'HFL',timesel=timesel,**ncparams) +\ cnc.getNCvar(fnamecb,'HFS',timesel=timesel, **ncparams) if field=='net': fnamecc,fnamepc = con.build_filepathpair(sim,'flg') fld = fld - cnc.getNCvar(fnamecc,'FLG',timesel=timesel,**ncparams) else: fld = cnc.getNCvar(fnamec,ncfield,timesel=timesel,**ncparams) elif filetype=='pert': if field in ('turb','net'): fnamec,fnamep = con.build_filepathpair(sim,'hfl') fnamecb,fnamepb = con.build_filepathpair(sim,'hfs') fld = cnc.getNCvar(fnamep,'HFL',timesel=timesel,**ncparams) +\ cnc.getNCvar(fnamepb,'HFS',timesel=timesel,**ncparams) if field=='net': fnamecc,fnamepc = con.build_filepathpair(sim,'flg') fld = fld - cnc.getNCvar(fnamepc,'FLG',timesel=timesel,**ncparams) else: fld = cnc.getNCvar(fnamep,ncfield,timesel=timesel,**ncparams) else: print "filetype not supported! ['diff'|'ctl'|'pert'|'pval']" return -1 if region != None: lat=cnc.getNCvar(fnamec,'lat'); lon=cnc.getNCvar(fnamec,'lon') if filetype=='pval': pert = cutl.calc_regmean(pert,lat,lon,region,alsomask=alsomask) ctl = cutl.calc_regmean(ctl,lat,lon,region,alsomask=alsomask) (tstat,pval) = cutl.ttest_ind(pert,ctl) fld=pval else: fld = cutl.calc_regmean(fld,lat,lon,region,alsomask=alsomask) if meantype=='time': if filetype=='pval': # this is an error. filetype supercedes meantype so time avg won't be done print 'filetype=pval and meantype-time. Ignore meantype and return pvals @@' timdict=fld else: #fldstd = np.std(fld,axis=0) fld = np.mean(fld,axis=0) #timdict[tfkey] = fld #,fldstd timdict=fld else: #timdict[tfkey] = fld timdict=fld simdict[sim] = timdict #datadict[field]=simdict # can I set attributes to a dictionary? @@ like for nfields, nsims, ntimes? #return datadict if rettype=='ndarray': # convert the dict to an array: # get the last sim data to initialize ndarray initshape=simdict[sim].shape initshape=(len(simulations),) + initshape tmp=np.zeros(initshape) for sii,skey in enumerate(simulations): tmp[sii,:] = simdict[skey] return tmp else: return simdict
allantcr1=np.zeros(inittime) allantcr2=np.zeros(inittime) allantpr1=np.zeros(inittime) allantpr2=np.zeros(inittime) allantr1=np.zeros(inittime) allantr2=np.zeros(inittime) tallii=0 # index to keep track of time in accumulated TOT ensemble aallii=0 # index to keep track of time in accumulated ANT ensemble for sim in sims: if field1 in ('turb','net'): fielda='hfl'; fieldb='hfs' fnamec,fnamepa=con.build_filepathpair(sim,fielda) fnamecb,fnamepb=con.build_filepathpair(sim,fieldb) fldc = cnc.getNCvar(fnamec,fielda.upper(),timesel=timesel,seas=sea1) +\ cnc.getNCvar(fnamecb,fieldb.upper(),timesel=timesel,seas=sea1) fldp = cnc.getNCvar(fnamepa,fielda.upper(),timesel=timesel,seas=sea1) +\ cnc.getNCvar(fnamepb,fieldb.upper(),timesel=timesel,seas=sea1) if field1=='net': fieldb='flg' conv=-1 fnamecb,fnamepb=con.build_filepathpair(sim,fieldb) fldc = fldc + cnc.getNCvar(fnamecb,fieldb.upper(), timesel=timesel,seas=sea1)*conv fldp = fldp + cnc.getNCvar(fnamepb,fieldb.upper(), timesel=timesel,seas=sea1)*conv