# DEBUG: ci=dblob['ci'] pv=dblob['pval'] import pandas as pd # calc whether ens means are sig different @@@@ if shadeens != None: # right now, will never be none. Should change that. @@@ # calc anom range vs mean anom for the given field within given ensemble allensdt,allensmdt = con.build_ensembles(shadeens,dblob,calctype='diff') for ens in shadeens: ensdf=pd.DataFrame(allensdt[ens]) #ensmdf=pd.DataFrame(allensmdt[ens]) ensrng = ensdf.max(axis=1)-ensdf.min(axis=1) ensrat = ensrng / ensdf.mean(axis=1) print ens + ' RANGE: ' print str(ensrng) print ens + ' MEAN: ' print str(ensdf.mean(axis=1)) print ens + ' RATIO*100: ' print str(ensrat*100) if len(shadeens)>1: sh.calc_ensemblestats(dblob,shadeens,seas=seasons)
def calc_ensemblestats(datablob,ensnames, seas=None,siglevel=0.05): """ datablob: 'diff' should be scalar regional mean anomalies ensnames: a tuple of ensemble names in the datablob. must match ensname in sims dictionary. e.g. histIC, histBC seas: 2 or 3 month seasons @@ add return vals """ ## diffdt = datablob['diff'] ## sims=diffdt.keys() ## simspdt = con.get_simpairsdict() if seas==None: seasons=('SON','DJF','MAM','JJA') else: seasons=seas # Need all members of ensemble, plus mean to do ttest between # ensemble means. This ttest is NOT in time, but across # ensemble members. Thus, n=5 for TOT and ANT. # return tstat, pval, stddev # all across ensemble, not in time. allensdt,allensmdt = con.build_ensembles(ensnames, datablob,calctype='diff') ## allensdt={}; allensmdt={}; ## for ensname in ensnames: ## ensdt={}; ensmdt={} ## print ensname ## for skey in sims: # for each simulation check if it's in the ensemble ## if simspdt[skey]['pert']['ensname']==ensname: ## # create an ensemble dict ## ensdt[skey] = datablob['diff'][skey] ## if simspdt[skey]['pert']['ensname']==ensname+'mean': ## ensmdt[skey] = datablob['diff'][skey] ## allensdt[ensname] = ensdt # dict of ens -> dict of sims in ens --> data ## allensmdt[ensname] = ensmdt # just the ensemble mean # end loop through ens ensdf1 = pd.DataFrame(allensdt[ensnames[0]]) ensdf2 = pd.DataFrame(allensdt[ensnames[1]]) ensm1 = allensmdt[ensnames[0]] # need DataFrame for this? should just be a val for each season ensm2 = allensmdt[ensnames[1]] for sea in seasons: print sea e1 = ensdf1.loc[sea] e2 = ensdf2.loc[sea] print sea print ensnames[0] + ' MEAN: ' + str(e1.mean()) + ' STD: ' + str(e1.std()) print ensnames[1] + ' MEAN: ' + str(e2.mean()) + ' STD: ' + str(e2.std()) tstat, pval = sp.stats.ttest_ind(e1,e2) print 'TSTAT: ' + str(tstat) + ' PVAL: ' + str(pval) if pval<=siglevel: print 'The ensemble means are significantly different (' + str(1-siglevel) + ')' fstat, fpval = sp.stats.f_oneway(e1,e2) print 'FSTAT: ' + str(fstat) + ' PVAL: ' + str(fpval) if fpval<=siglevel: print 'The ensemble means are significantly different (' + str(1-siglevel) + ')' lstat, lpval = sp.stats.levene(e1,e2) print 'LSTAT: ' + str(lstat) + ' PVAL: ' + str(lpval) if lpval<=siglevel: print 'The ensemble variances are significantly different (' + str(1-siglevel) + ')' print '@@@@ not done, still need to add return vals'