예제 #1
0
def levene(*args,**kwds):
    """
    Perform Levene test for equal variances.

    The Levene test tests the null hypothesis that all input samples
    are from populations with equal variances.  Levene's test is an
    alternative to Bartlett's test `bartlett` in the case where
    there are significant deviations from normality.

    Parameters
    ----------
    sample1, sample2, ... : array_like
        The sample data, possibly with different lengths
    center : {'mean', 'median', 'trimmed'}, optional
        Which function of the data to use in the test.  The default
        is 'median'.
    proportiontocut : float, optional
        When `center` is 'trimmed', this gives the proportion of data points
        to cut from each end. (See `scipy.stats.trim_mean`.)
        Default is 0.05.

    Returns
    -------
    W : float
        The test statistic.
    p-value : float
        The p-value for the test.

    Notes
    -----
    Three variations of Levene's test are possible.  The possibilities
    and their recommended usages are:

      * 'median' : Recommended for skewed (non-normal) distributions>
      * 'mean' : Recommended for symmetric, moderate-tailed distributions.
      * 'trimmed' : Recommended for heavy-tailed distributions.

    References
    ----------
    .. [1]  http://www.itl.nist.gov/div898/handbook/eda/section3/eda35a.htm
    .. [2]   Levene, H. (1960). In Contributions to Probability and Statistics:
               Essays in Honor of Harold Hotelling, I. Olkin et al. eds.,
               Stanford University Press, pp. 278-292.
    .. [3]  Brown, M. B. and Forsythe, A. B. (1974), Journal of the American
              Statistical Association, 69, 364-367

    """
    # Handle keyword arguments.
    center = 'median'
    proportiontocut = 0.05
    for kw, value in kwds.items():
        if kw not in ['center', 'proportiontocut']:
            raise TypeError("levene() got an unexpected keyword argument '%s'" % kw)
        if kw == 'center':
            center = value
        else:
            proportiontocut = value

    k = len(args)
    if k < 2:
        raise ValueError("Must enter at least two input sample vectors.")
    Ni = zeros(k)
    Yci = zeros(k,'d')

    if not center in ['mean','median','trimmed']:
        raise ValueError("Keyword argument <center> must be 'mean', 'median'"
              + "or 'trimmed'.")

    if center == 'median':
        func = lambda x: np.median(x, axis=0)
    elif center == 'mean':
        func = lambda x: np.mean(x, axis=0)
    else: # center == 'trimmed'
        args = tuple(stats.trimboth(arg, proportiontocut) for arg in args)
        func = lambda x: np.mean(x, axis=0)

    for j in range(k):
        Ni[j] = len(args[j])
        Yci[j] = func(args[j])
    Ntot = sum(Ni,axis=0)

    # compute Zij's
    Zij = [None]*k
    for i in range(k):
        Zij[i] = abs(asarray(args[i])-Yci[i])
    # compute Zbari
    Zbari = zeros(k,'d')
    Zbar = 0.0
    for i in range(k):
        Zbari[i] = np.mean(Zij[i], axis=0)
        Zbar += Zbari[i]*Ni[i]
    Zbar /= Ntot

    numer = (Ntot-k)*sum(Ni*(Zbari-Zbar)**2,axis=0)

    # compute denom_variance
    dvar = 0.0
    for i in range(k):
        dvar += sum((Zij[i]-Zbari[i])**2,axis=0)

    denom = (k-1.0)*dvar

    W = numer / denom
    pval = distributions.f.sf(W,k-1,Ntot-k) # 1 - cdf
    return W, pval
예제 #2
0
def fligner(*args,**kwds):
    """
    Perform Fligner's test for equal variances.

    Fligner's test tests the null hypothesis that all input samples
    are from populations with equal variances.  Fligner's test is
    non-parametric in contrast to Bartlett's test `bartlett` and
    Levene's test `levene`.

    Parameters
    ----------
    sample1, sample2, ... : array_like
        arrays of sample data.  Need not be the same length
    center : {'mean', 'median', 'trimmed'}, optional
        keyword argument controlling which function of the data
        is used in computing the test statistic.  The default
        is 'median'.
    proportiontocut : float, optional
        When `center` is 'trimmed', this gives the proportion of data points
        to cut from each end. (See `scipy.stats.trim_mean`.)
        Default is 0.05.

    Returns
    -------
    Xsq : float
        the test statistic
    p-value : float
        the p-value for the hypothesis test

    Notes
    -----
    As with Levene's test there are three variants
    of Fligner's test that differ by the measure of central
    tendency used in the test.  See `levene` for more information.

    References
    ----------
    .. [1] http://www.stat.psu.edu/~bgl/center/tr/TR993.ps

    .. [2] Fligner, M.A. and Killeen, T.J. (1976). Distribution-free two-sample
           tests for scale. 'Journal of the American Statistical Association.'
           71(353), 210-213.

    """
    # Handle keyword arguments.
    center = 'median'
    proportiontocut = 0.05
    for kw, value in kwds.items():
        if kw not in ['center', 'proportiontocut']:
            raise TypeError("fligner() got an unexpected keyword argument '%s'" % kw)
        if kw == 'center':
            center = value
        else:
            proportiontocut = value

    k = len(args)
    if k < 2:
        raise ValueError("Must enter at least two input sample vectors.")

    if not center in ['mean','median','trimmed']:
        raise ValueError("Keyword argument <center> must be 'mean', 'median'"
              + "or 'trimmed'.")

    if center == 'median':
        func = lambda x: np.median(x, axis=0)
    elif center == 'mean':
        func = lambda x: np.mean(x, axis=0)
    else: # center == 'trimmed'
        args = tuple(stats.trimboth(arg, proportiontocut) for arg in args)
        func = lambda x: np.mean(x, axis=0)

    Ni = asarray([len(args[j]) for j in range(k)])
    Yci = asarray([func(args[j]) for j in range(k)])
    Ntot = sum(Ni,axis=0)
    # compute Zij's
    Zij = [abs(asarray(args[i])-Yci[i]) for i in range(k)]
    allZij = []
    g = [0]
    for i in range(k):
        allZij.extend(list(Zij[i]))
        g.append(len(allZij))

    ranks = stats.rankdata(allZij)
    a = distributions.norm.ppf(ranks/(2*(Ntot+1.0)) + 0.5)

    # compute Aibar
    Aibar = _apply_func(a,g,sum) / Ni
    anbar = np.mean(a, axis=0)
    varsq = np.var(a,axis=0, ddof=1)
    Xsq = sum(Ni*(asarray(Aibar)-anbar)**2.0,axis=0)/varsq
    pval = distributions.chi2.sf(Xsq,k-1) # 1 - cdf
    return Xsq, pval
예제 #3
0
print stats.obrientransform(a,a,a,a,a)

print 'samplevar:',stats.samplevar(l),stats.samplevar(a)
print 'samplestdev:',stats.samplestdev(l),stats.samplestdev(a)
print 'var:',stats.var(l),stats.var(a)
print 'stdev:',stats.stdev(l),stats.stdev(a)
print 'sterr:',stats.sterr(l),stats.sterr(a)
print 'sem:',stats.sem(l),stats.sem(a)
print 'z:',stats.z(l,4),stats.z(a,4)
print 'zs:'
print stats.zs(l)
print stats.zs(a)

print '\nTRIMMING'
print 'trimboth:'
print stats.trimboth(l,.2)
print stats.trimboth(lf,.2)
print stats.trimboth(a,.2)
print stats.trimboth(af,.2)
print 'trim1:'
print stats.trim1(l,.2)
print stats.trim1(lf,.2)
print stats.trim1(a,.2)
print stats.trim1(af,.2)

print '\nCORRELATION'
#execfile('testpairedstats.py')

l = range(1,21)
a = N.array(l)
ll = [l]*5
예제 #4
0
print(stats.obrientransform(l,l,l,l,l))
print(stats.obrientransform(a,a,a,a,a))
print('samplevar:',stats.samplevar(l),stats.samplevar(a))
print('samplestdev:',stats.samplestdev(l),stats.samplestdev(a))
print('var:',stats.var(l),stats.var(a))
print('stdev:',stats.stdev(l),stats.stdev(a))
print('sterr:',stats.sterr(l),stats.sterr(a))
print('sem:',stats.sem(l),stats.sem(a))
print('z:',stats.z(l,4),stats.z(a,4))
print('zs:')
print(stats.zs(l))
print(stats.zs(a))
print('\nTRIMMING')
print('trimboth:')
print(stats.trimboth(l,.2))
print(stats.trimboth(lf,.2))
print(stats.trimboth(a,.2))
print(stats.trimboth(af,.2))
print('trim1:')
print(stats.trim1(l,.2))
print(stats.trim1(lf,.2))
print(stats.trim1(a,.2))
print(stats.trim1(af,.2))
print('\nCORRELATION')
# execfile('testpairedstats.py')

l = range(1,21)
a = N.array(l)
ll = [l]*5
aa = N.array(ll)
예제 #5
0
print stats.obrientransform(a, a, a, a, a)

print 'samplevar:', stats.samplevar(l), stats.samplevar(a)
print 'samplestdev:', stats.samplestdev(l), stats.samplestdev(a)
print 'var:', stats.var(l), stats.var(a)
print 'stdev:', stats.stdev(l), stats.stdev(a)
print 'sterr:', stats.sterr(l), stats.sterr(a)
print 'sem:', stats.sem(l), stats.sem(a)
print 'z:', stats.z(l, 4), stats.z(a, 4)
print 'zs:'
print stats.zs(l)
print stats.zs(a)

print '\nTRIMMING'
print 'trimboth:'
print stats.trimboth(l, .2)
print stats.trimboth(lf, .2)
print stats.trimboth(a, .2)
print stats.trimboth(af, .2)
print 'trim1:'
print stats.trim1(l, .2)
print stats.trim1(lf, .2)
print stats.trim1(a, .2)
print stats.trim1(af, .2)

print '\nCORRELATION'
# execfile('testpairedstats.py')

l = range(1, 21)
a = N.array(l)
ll = [l] * 5