Esempio n. 1
0

    # DEBUG:
    ci=dblob['ci']
    pv=dblob['pval']


    import pandas as pd
    
    

    # calc whether ens means are sig different @@@@
    if shadeens != None: # right now, will never be none. Should change that. @@@

        # calc anom range vs mean anom for the given field within given ensemble
        allensdt,allensmdt = con.build_ensembles(shadeens,dblob,calctype='diff')
        for ens in shadeens:
            ensdf=pd.DataFrame(allensdt[ens])
            #ensmdf=pd.DataFrame(allensmdt[ens])
            ensrng = ensdf.max(axis=1)-ensdf.min(axis=1)
            ensrat = ensrng / ensdf.mean(axis=1)
            print ens + ' RANGE: '
            print str(ensrng)
            print ens + ' MEAN: '
            print str(ensdf.mean(axis=1))
            print ens + ' RATIO*100: '
            print str(ensrat*100)
            
        if len(shadeens)>1:
            sh.calc_ensemblestats(dblob,shadeens,seas=seasons)
    
Esempio n. 2
0
def calc_ensemblestats(datablob,ensnames, seas=None,siglevel=0.05):
    """         
           datablob: 'diff' should be scalar regional mean anomalies
           ensnames: a tuple of ensemble names in the datablob.
                     must match ensname in sims dictionary. e.g. histIC, histBC
           seas: 2 or 3 month seasons

           @@ add return vals
    """    
    
    ## diffdt = datablob['diff']
    ## sims=diffdt.keys()
    ## simspdt = con.get_simpairsdict()

    if seas==None:
        seasons=('SON','DJF','MAM','JJA')
    else:
        seasons=seas
        
    # Need all members of ensemble, plus mean to do ttest between
    # ensemble means. This ttest is NOT in time, but across
    # ensemble members. Thus, n=5 for TOT and ANT.

    # return tstat, pval, stddev # all across ensemble, not in time.

    allensdt,allensmdt = con.build_ensembles(ensnames, datablob,calctype='diff')
    
    ## allensdt={}; allensmdt={};
            
    ## for ensname in ensnames:
    ##     ensdt={}; ensmdt={}
    ##     print ensname
    ##     for skey in sims: # for each simulation check if it's in the ensemble

    ##         if simspdt[skey]['pert']['ensname']==ensname:
    ##             # create an ensemble dict
    ##             ensdt[skey] = datablob['diff'][skey]
    ##         if simspdt[skey]['pert']['ensname']==ensname+'mean':
    ##             ensmdt[skey] = datablob['diff'][skey]

    ##     allensdt[ensname] = ensdt # dict of ens -> dict of sims in ens --> data
    ##     allensmdt[ensname] = ensmdt # just the ensemble mean
        
    # end loop through ens
    ensdf1 = pd.DataFrame(allensdt[ensnames[0]])
    ensdf2 = pd.DataFrame(allensdt[ensnames[1]])
    ensm1 = allensmdt[ensnames[0]] # need DataFrame for this? should just be a val for each season
    ensm2 = allensmdt[ensnames[1]]
                    
    for sea in seasons:
         print sea
         
         e1 = ensdf1.loc[sea]
         e2 = ensdf2.loc[sea]
         print sea
         print ensnames[0] + ' MEAN: ' + str(e1.mean()) + ' STD: ' + str(e1.std())
         print ensnames[1] + ' MEAN: ' + str(e2.mean()) + ' STD: ' + str(e2.std())

         tstat, pval = sp.stats.ttest_ind(e1,e2)
         print 'TSTAT: ' + str(tstat) + ' PVAL: ' + str(pval)
         if pval<=siglevel:
             print 'The ensemble means are significantly different (' + str(1-siglevel) + ')'

         fstat, fpval = sp.stats.f_oneway(e1,e2)
         print 'FSTAT: ' + str(fstat) + ' PVAL: ' + str(fpval)
         if fpval<=siglevel:
             print 'The ensemble means are significantly different (' + str(1-siglevel) + ')'

         lstat, lpval = sp.stats.levene(e1,e2)
         print 'LSTAT: ' + str(lstat) + ' PVAL: ' + str(lpval)
         if lpval<=siglevel:
             print 'The ensemble variances are significantly different (' + str(1-siglevel) + ')'


    print '@@@@ not done, still need to add return vals'