def levene(*args,**kwds): """ Perform Levene test for equal variances. The Levene test tests the null hypothesis that all input samples are from populations with equal variances. Levene's test is an alternative to Bartlett's test `bartlett` in the case where there are significant deviations from normality. Parameters ---------- sample1, sample2, ... : array_like The sample data, possibly with different lengths center : {'mean', 'median', 'trimmed'}, optional Which function of the data to use in the test. The default is 'median'. proportiontocut : float, optional When `center` is 'trimmed', this gives the proportion of data points to cut from each end. (See `scipy.stats.trim_mean`.) Default is 0.05. Returns ------- W : float The test statistic. p-value : float The p-value for the test. Notes ----- Three variations of Levene's test are possible. The possibilities and their recommended usages are: * 'median' : Recommended for skewed (non-normal) distributions> * 'mean' : Recommended for symmetric, moderate-tailed distributions. * 'trimmed' : Recommended for heavy-tailed distributions. References ---------- .. [1] http://www.itl.nist.gov/div898/handbook/eda/section3/eda35a.htm .. [2] Levene, H. (1960). In Contributions to Probability and Statistics: Essays in Honor of Harold Hotelling, I. Olkin et al. eds., Stanford University Press, pp. 278-292. .. [3] Brown, M. B. and Forsythe, A. B. (1974), Journal of the American Statistical Association, 69, 364-367 """ # Handle keyword arguments. center = 'median' proportiontocut = 0.05 for kw, value in kwds.items(): if kw not in ['center', 'proportiontocut']: raise TypeError("levene() got an unexpected keyword argument '%s'" % kw) if kw == 'center': center = value else: proportiontocut = value k = len(args) if k < 2: raise ValueError("Must enter at least two input sample vectors.") Ni = zeros(k) Yci = zeros(k,'d') if not center in ['mean','median','trimmed']: raise ValueError("Keyword argument <center> must be 'mean', 'median'" + "or 'trimmed'.") if center == 'median': func = lambda x: np.median(x, axis=0) elif center == 'mean': func = lambda x: np.mean(x, axis=0) else: # center == 'trimmed' args = tuple(stats.trimboth(arg, proportiontocut) for arg in args) func = lambda x: np.mean(x, axis=0) for j in range(k): Ni[j] = len(args[j]) Yci[j] = func(args[j]) Ntot = sum(Ni,axis=0) # compute Zij's Zij = [None]*k for i in range(k): Zij[i] = abs(asarray(args[i])-Yci[i]) # compute Zbari Zbari = zeros(k,'d') Zbar = 0.0 for i in range(k): Zbari[i] = np.mean(Zij[i], axis=0) Zbar += Zbari[i]*Ni[i] Zbar /= Ntot numer = (Ntot-k)*sum(Ni*(Zbari-Zbar)**2,axis=0) # compute denom_variance dvar = 0.0 for i in range(k): dvar += sum((Zij[i]-Zbari[i])**2,axis=0) denom = (k-1.0)*dvar W = numer / denom pval = distributions.f.sf(W,k-1,Ntot-k) # 1 - cdf return W, pval
def fligner(*args,**kwds): """ Perform Fligner's test for equal variances. Fligner's test tests the null hypothesis that all input samples are from populations with equal variances. Fligner's test is non-parametric in contrast to Bartlett's test `bartlett` and Levene's test `levene`. Parameters ---------- sample1, sample2, ... : array_like arrays of sample data. Need not be the same length center : {'mean', 'median', 'trimmed'}, optional keyword argument controlling which function of the data is used in computing the test statistic. The default is 'median'. proportiontocut : float, optional When `center` is 'trimmed', this gives the proportion of data points to cut from each end. (See `scipy.stats.trim_mean`.) Default is 0.05. Returns ------- Xsq : float the test statistic p-value : float the p-value for the hypothesis test Notes ----- As with Levene's test there are three variants of Fligner's test that differ by the measure of central tendency used in the test. See `levene` for more information. References ---------- .. [1] http://www.stat.psu.edu/~bgl/center/tr/TR993.ps .. [2] Fligner, M.A. and Killeen, T.J. (1976). Distribution-free two-sample tests for scale. 'Journal of the American Statistical Association.' 71(353), 210-213. """ # Handle keyword arguments. center = 'median' proportiontocut = 0.05 for kw, value in kwds.items(): if kw not in ['center', 'proportiontocut']: raise TypeError("fligner() got an unexpected keyword argument '%s'" % kw) if kw == 'center': center = value else: proportiontocut = value k = len(args) if k < 2: raise ValueError("Must enter at least two input sample vectors.") if not center in ['mean','median','trimmed']: raise ValueError("Keyword argument <center> must be 'mean', 'median'" + "or 'trimmed'.") if center == 'median': func = lambda x: np.median(x, axis=0) elif center == 'mean': func = lambda x: np.mean(x, axis=0) else: # center == 'trimmed' args = tuple(stats.trimboth(arg, proportiontocut) for arg in args) func = lambda x: np.mean(x, axis=0) Ni = asarray([len(args[j]) for j in range(k)]) Yci = asarray([func(args[j]) for j in range(k)]) Ntot = sum(Ni,axis=0) # compute Zij's Zij = [abs(asarray(args[i])-Yci[i]) for i in range(k)] allZij = [] g = [0] for i in range(k): allZij.extend(list(Zij[i])) g.append(len(allZij)) ranks = stats.rankdata(allZij) a = distributions.norm.ppf(ranks/(2*(Ntot+1.0)) + 0.5) # compute Aibar Aibar = _apply_func(a,g,sum) / Ni anbar = np.mean(a, axis=0) varsq = np.var(a,axis=0, ddof=1) Xsq = sum(Ni*(asarray(Aibar)-anbar)**2.0,axis=0)/varsq pval = distributions.chi2.sf(Xsq,k-1) # 1 - cdf return Xsq, pval
print stats.obrientransform(a,a,a,a,a) print 'samplevar:',stats.samplevar(l),stats.samplevar(a) print 'samplestdev:',stats.samplestdev(l),stats.samplestdev(a) print 'var:',stats.var(l),stats.var(a) print 'stdev:',stats.stdev(l),stats.stdev(a) print 'sterr:',stats.sterr(l),stats.sterr(a) print 'sem:',stats.sem(l),stats.sem(a) print 'z:',stats.z(l,4),stats.z(a,4) print 'zs:' print stats.zs(l) print stats.zs(a) print '\nTRIMMING' print 'trimboth:' print stats.trimboth(l,.2) print stats.trimboth(lf,.2) print stats.trimboth(a,.2) print stats.trimboth(af,.2) print 'trim1:' print stats.trim1(l,.2) print stats.trim1(lf,.2) print stats.trim1(a,.2) print stats.trim1(af,.2) print '\nCORRELATION' #execfile('testpairedstats.py') l = range(1,21) a = N.array(l) ll = [l]*5
print(stats.obrientransform(l,l,l,l,l)) print(stats.obrientransform(a,a,a,a,a)) print('samplevar:',stats.samplevar(l),stats.samplevar(a)) print('samplestdev:',stats.samplestdev(l),stats.samplestdev(a)) print('var:',stats.var(l),stats.var(a)) print('stdev:',stats.stdev(l),stats.stdev(a)) print('sterr:',stats.sterr(l),stats.sterr(a)) print('sem:',stats.sem(l),stats.sem(a)) print('z:',stats.z(l,4),stats.z(a,4)) print('zs:') print(stats.zs(l)) print(stats.zs(a)) print('\nTRIMMING') print('trimboth:') print(stats.trimboth(l,.2)) print(stats.trimboth(lf,.2)) print(stats.trimboth(a,.2)) print(stats.trimboth(af,.2)) print('trim1:') print(stats.trim1(l,.2)) print(stats.trim1(lf,.2)) print(stats.trim1(a,.2)) print(stats.trim1(af,.2)) print('\nCORRELATION') # execfile('testpairedstats.py') l = range(1,21) a = N.array(l) ll = [l]*5 aa = N.array(ll)
print stats.obrientransform(a, a, a, a, a) print 'samplevar:', stats.samplevar(l), stats.samplevar(a) print 'samplestdev:', stats.samplestdev(l), stats.samplestdev(a) print 'var:', stats.var(l), stats.var(a) print 'stdev:', stats.stdev(l), stats.stdev(a) print 'sterr:', stats.sterr(l), stats.sterr(a) print 'sem:', stats.sem(l), stats.sem(a) print 'z:', stats.z(l, 4), stats.z(a, 4) print 'zs:' print stats.zs(l) print stats.zs(a) print '\nTRIMMING' print 'trimboth:' print stats.trimboth(l, .2) print stats.trimboth(lf, .2) print stats.trimboth(a, .2) print stats.trimboth(af, .2) print 'trim1:' print stats.trim1(l, .2) print stats.trim1(lf, .2) print stats.trim1(a, .2) print stats.trim1(af, .2) print '\nCORRELATION' # execfile('testpairedstats.py') l = range(1, 21) a = N.array(l) ll = [l] * 5