nt = fldsel.shape[0] nlon=fldsel.shape[2] nlat=fldsel.shape[1] fldre = fldsel.reshape((nt,nlon*nlat)) xx=np.arange(0,nt) #ensseldt[eii] = fldsel if field=='sic': ensnhdt[eii],ensshdt[eii] = cutl.calc_totseaicearea(fld/100.,lat,lon,isarea=False) fldsel=cutl.calc_seaicearea(fldsel,lat,lon) fldre=fldsel.reshape((nt,nlon*nlat)) else: ensgmdt[eii] = cutl.global_mean_areawgted3d(fld,lat,lon) ensrmdt[eii] = cutl.calc_regmean(fld,lat,lon,region) #slope[eii], intercept, r_value, p_value, std_err = sp.stats.linregress(xx,dat) # not good for 3d data? # this is just the second timesel (ie 2002-2012) slope,intercept = np.polyfit(xx,fldre,1) # supposedly can do w/ higher dims? enstrnddt[eii] = slope #.reshape((nlat,nlon)) # reshape later @@ # also save all trends into one dictionary (don't differentiate by seed/base run) alltrnddt[superii] = slope superii+=1 if field=='sic': simnhdt[sim] = pd.DataFrame(ensnhdt,index=years)
if field1=='sia': fldcreg=cutl.calc_regtotseaicearea(fldc,lat,lon,region1) # isarea=False fldpreg=cutl.calc_regtotseaicearea(fldp,lat,lon,region1) # isarea=False else: # IF FLUX, MASK OUT OPEN-WATER: #print 'Implement masking out of open water in control for FLUXES @@@' if field1 in ('hfl','hfs','flg','fsg','turb','net'): print 'field1, ' + field1 + ' is a flux. Mask out non-ice in control' # @@ # mask out regions that are not ice in the control (as P&M 2014 JClim) sicfname,junk=con.build_filepathpair(sim,'sicn') sicnc = cnc.getNCvar(sicfname,'SICN',timesel=timesel,seas=sea1) fldc = ma.masked_where(sicnc<.10,fldc) fldp = ma.masked_where(sicnc<.10,fldp) fldcreg=cutl.calc_regmean(fldc,lat,lon,region1) fldpreg=cutl.calc_regmean(fldp,lat,lon,region1) flddregdt[sim] = np.mean(fldpreg-fldcreg,axis=0) # time mean if sea2 in ('DJF','NDJ') and sea1 not in ('DJF','NDJ'): # have to shorten other timeseries flddregtsdt[sim] = fldpreg[:-1,...]-np.mean(fldcreg[:-1,...],axis=0) # anomaly timeseries from ctl mean else: flddregtsdt[sim] = fldpreg-np.mean(fldcreg,axis=0) if (field2 != field1) or (sea2 != sea1): print field2 + ' != ' + field1 + ' or ' + str(sea2) + ' != ' + str(sea1) if field2 in ('turb','net'): fielda='hfl'; fieldb='hfs' fnamec,fnamepa=con.build_filepathpair(sim,fielda)
def loaddata(fields, simulations, ncfields=None,model='CanAM4',timeper='001-121',timefreq=None, levsel=None,meantype=None,filetype='diff',region=None,alsomask=None,rettype='dict'): """ loaddata(fields, simulations,ncfields=None,model='CanAM4',timeper='001-121',timefreq=None, levsel=None, meantype=None,filetype='diff',region=None) fields: tuple of field names [@@update. only one field now but still need tuple] simulations: tuple of simulation names (diff names='E1'...'ENS','NSIDC' etc) ncfields: tuple of ncfield names (var name in file itself). Default to upper case of field model: for now only 'CanAM4' is implemented timeper: time period of the data (used for filename). default '001-121' timefreq: time frequency TO RETURN. default all data 'monthly'|'seasonal'|'climo'|'ANN'|'DJF'|'JJA'|'NDJ'|'MAM'|'SON'| 1,2,3,4,5,6,7,8,9,10,11,12 levsel: select level in Pa (e.g. 50000 for 500hPa). default all levels meantype: 'time','zonal' @@for now. default None, but recommended to choose one if loading multiple variables and multiple simulations at once. It is assumed that time is the first dimension. filetype: 'diff','ctl','pert','pval' Default is diff where both ctl and pert are read in and differenced. region: any of the regions in constants -> region dict. default None. alsomask: if specified as 'land' or 'ocean', then calc_regmean() will mask before computing regional avg (ie. will NOT include it in average). Only used if region!=None. Default None. returns: nested dictionary@@ FIELDS->SIMULATIONS->TIMEFREQ Load requested fields from requested CanAM4 simulations into dictionaries (dataframes?). Function automatically skips first year of simulation and gets a timeseries (or climo if requested) of the rest (assumed through year 121). 3D data and 'turb' not yet implemented! @@ """ if model!='CanAM4': print 'model not supported!' return -1 print '@@ probably should invert the order such that it is field, season, sim?' #bp=con.get_basepath() #basepath=bp['basepath'] + model + '/'; subdir=bp['subdir'] timesel='0002-01-01,0121-12-31' seabool=False # set to True if requested time freq is one season or climo monbool=False # set to True if requested time freq is one month if timefreq=='monthly': # get all months tf = con.get_mon() elif timefreq=='seasonal': # get all 4 seasons tf = 'DJF','MAM','JJA','SON' elif timefreq in range(1,13): # choose an individual month tf=timefreq monbool=True elif timefreq in ('climo','ANN','DJF','JJA','NDJ','MAM','SON','ND','JF','SO'): tf=timefreq seabool=True print tf # @@ # @@@@@ add handling of sia! #datadict = dict.fromkeys(fields,{}) #for fii,field in enumerate(fields): if 1: # GET RID OF FIELD dim too 5/8/2015 fii=0; field=fields[0] if ncfields==None: ncfield=field.upper() else: ncfield=ncfields[fii] print field,ncfield #@@ # assume simulations entered are of the form: E1, R3, ENSE etc. Then # filetype input arg tells which simulation to get (or both) simdict = dict.fromkeys(simulations,{}) for sim in simulations: print sim # @@ #timdict = dict.fromkeys(tf) # construct filename here @@ fnamec,fnamep = con.build_filepathpair(sim,field) #fname = basepath+sim+subdir+sim+'_'+field+'_'+timeper+'_ts.nc' print fnamec tfkey = tf #for tfkey in tf: # print tfkey # @@ #print 'too many levels in the dict...get rid of seasonal keys and just do one@@@ 5/1/2015' # @@ get the data with cnc.getNCvar() here ncparams = {} if monbool: ncparams = {'monsel': tfkey} elif seabool: ncparams = {'seas': tfkey} if levsel!=None: ncparams['levsel'] = levsel if meantype=='zonal': ncparams['calc'] = 'zm' if filetype=='diff' or filetype=='pval': if field in ('turb','net'): fnamec,fnamep = con.build_filepathpair(sim,'hfl') fnamecb,fnamepb = con.build_filepathpair(sim,'hfs') ctl = cnc.getNCvar(fnamec,'HFL',timesel=timesel,**ncparams) +\ cnc.getNCvar(fnamecb,'HFS',timesel=timesel, **ncparams) pert = cnc.getNCvar(fnamep,'HFL',timesel=timesel,**ncparams) +\ cnc.getNCvar(fnamepb,'HFS',timesel=timesel,**ncparams) if field=='net': fnamecc,fnamepc = con.build_filepathpair(sim,'flg') ctl = ctl - cnc.getNCvar(fnamecc,'FLG',timesel=timesel,**ncparams) pert = pert - cnc.getNCvar(fnamepc,'FLG',timesel=timesel,**ncparams) else: pert = cnc.getNCvar(fnamep,ncfield,timesel=timesel,**ncparams) ctl = cnc.getNCvar(fnamec,ncfield,timesel=timesel,**ncparams) fld = pert - ctl elif filetype=='ctl': if field in ('turb','net'): fnamec,fnamep = con.build_filepathpair(sim,'hfl') fnamecb,fnamepb = con.build_filepathpair(sim,'hfs') fld = cnc.getNCvar(fnamec,'HFL',timesel=timesel,**ncparams) +\ cnc.getNCvar(fnamecb,'HFS',timesel=timesel, **ncparams) if field=='net': fnamecc,fnamepc = con.build_filepathpair(sim,'flg') fld = fld - cnc.getNCvar(fnamecc,'FLG',timesel=timesel,**ncparams) else: fld = cnc.getNCvar(fnamec,ncfield,timesel=timesel,**ncparams) elif filetype=='pert': if field in ('turb','net'): fnamec,fnamep = con.build_filepathpair(sim,'hfl') fnamecb,fnamepb = con.build_filepathpair(sim,'hfs') fld = cnc.getNCvar(fnamep,'HFL',timesel=timesel,**ncparams) +\ cnc.getNCvar(fnamepb,'HFS',timesel=timesel,**ncparams) if field=='net': fnamecc,fnamepc = con.build_filepathpair(sim,'flg') fld = fld - cnc.getNCvar(fnamepc,'FLG',timesel=timesel,**ncparams) else: fld = cnc.getNCvar(fnamep,ncfield,timesel=timesel,**ncparams) else: print "filetype not supported! ['diff'|'ctl'|'pert'|'pval']" return -1 if region != None: lat=cnc.getNCvar(fnamec,'lat'); lon=cnc.getNCvar(fnamec,'lon') if filetype=='pval': pert = cutl.calc_regmean(pert,lat,lon,region,alsomask=alsomask) ctl = cutl.calc_regmean(ctl,lat,lon,region,alsomask=alsomask) (tstat,pval) = cutl.ttest_ind(pert,ctl) fld=pval else: fld = cutl.calc_regmean(fld,lat,lon,region,alsomask=alsomask) if meantype=='time': if filetype=='pval': # this is an error. filetype supercedes meantype so time avg won't be done print 'filetype=pval and meantype-time. Ignore meantype and return pvals @@' timdict=fld else: #fldstd = np.std(fld,axis=0) fld = np.mean(fld,axis=0) #timdict[tfkey] = fld #,fldstd timdict=fld else: #timdict[tfkey] = fld timdict=fld simdict[sim] = timdict #datadict[field]=simdict # can I set attributes to a dictionary? @@ like for nfields, nsims, ntimes? #return datadict if rettype=='ndarray': # convert the dict to an array: # get the last sim data to initialize ndarray initshape=simdict[sim].shape initshape=(len(simulations),) + initshape tmp=np.zeros(initshape) for sii,skey in enumerate(simulations): tmp[sii,:] = simdict[skey] return tmp else: return simdict
fldc = flddt["iga"] fldcclim, std = cutl.climatologize(fldc) # climo mean (iga) fldclimdt["iga"] = fldcclim for skey in sims[2:10]: # Group I fld = flddt[skey] # timeseries flddiffdt[skey] = fldc2x - fld # climos fldclimdt[skey], std = cutl.climatologize(fld) flddiffclimdt[skey] = fldc2xclim - fldclimdt[skey] rmse = np.sqrt(np.square(flddiffdt[skey])) rmseclim = np.sqrt(np.square(flddiffclimdt[skey])) rmsedt[skey] = cutl.calc_regmean(rmse, lat, lon, region) rmseclimdt[skey] = cutl.calc_regmean(rmseclim, lat, lon, region) annrmseclimdt[skey] = cutl.annualize_monthlyts(rmseclimdt[skey]) climrmsedt[skey], rmsestd = cutl.climatologize(rmsedt[skey]) for skey in sims[10:]: # Group II fld = flddt[skey] # timeseries flddiffdt[skey] = fldc - fld # climos fldclimdt[skey], std = cutl.climatologize(fld) flddiffclimdt[skey] = fldcclim - fldclimdt[skey] rmse = np.sqrt(np.square(flddiffdt[skey]))