Beispiel #1
0
def pointbiserialr(x:List(float),y:List(float))->(float,float):
    """
Calculates a point-biserial correlation coefficient and the associated
probability value.  Taken from Heiman's Basic Statistics for the Behav.
Sci (1st), p.194.

Usage:   lpointbiserialr(x,y)      where x,y are equal-length lists
Returns: Point-biserial r, two-tailed p-value
"""
    TINY = 1e-30
    if len(x) != len(y):
        raise ValueError('INPUT VALUES NOT PAIRED IN pointbiserialr.  ABORTING.')
    data = pstat.abut(x,y)
    categories = pstat.unique(x)
    if len(categories) != 2:
        raise ValueError("Exactly 2 categories required for pointbiserialr().")
    else:   # there are 2 categories, continue
        codemap = pstat.abut(categories,list(range(2)))
        recoded = pstat.recode(data,codemap,0)
        _x = pstat.linexand(data,0,categories[0])
        _y = pstat.linexand(data,0,categories[1])
        xmean = central_tendency.mean(pstat.colex(_x,1))
        ymean = central_tendency.mean(pstat.colex(_y,1))
        n = len(data)
        adjust = sqrt((len(_x)/float(n))*(len(_y)/float(n)))
        rpb = (ymean - xmean)/variability.samplestdev(pstat.colex(data,1))*adjust
        df = n-2
        t = rpb*sqrt(df/((1.0-rpb+TINY)*(1.0+rpb+TINY)))
        prob = probability.betai(0.5*df,0.5,df/(df+t*t))  # t already a float
        return rpb, prob
Beispiel #2
0
def linregress(x:List(float),y:List(float))->(float,float,float,float,float):
    """
Calculates a regression line on x,y pairs.

Usage:   llinregress(x,y)      x,y are equal-length lists of x-y coordinates
Returns: slope, intercept, r, two-tailed prob, sterr-of-estimate
"""
    TINY = 1.0e-20
    if len(x) != len(y):
        raise ValueError('Input values not paired in linregress.  Aborting.')
    n = len(x)
    x = list(map(float,x))
    y = list(map(float,y))
    xmean = central_tendency.mean(x)
    ymean = central_tendency.mean(y)
    r_num = float(n*(support.summult(x,y)) - sum(x)*sum(y))
    r_den = sqrt((n*support.ss(x) - support.square_of_sums(x))*(n*support.ss(y)-support.square_of_sums(y)))
    r = r_num / r_den
    z = 0.5*log((1.0+r+TINY)/(1.0-r+TINY))
    df = n-2
    t = r*sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
    prob = probability.betai(0.5*df,0.5,df/(df+t*t))
    slope = r_num / float(n*support.ss(x) - support.square_of_sums(x))
    intercept = ymean - slope*xmean
    sterrest = sqrt(1-r*r)*variability.samplestdev(y)
    return slope, intercept, r, prob, sterrest
def obrientransform(args: List(List(float))) -> List(List(float)):
    """
Computes a transform on input data (any number of columns).  Used to
test for homogeneity of variance prior to running one-way stats.  From
Maxwell and Delaney, p.112.

Usage:   lobrientransform(*args)
Returns: transformed data for use in an ANOVA
"""
    TINY = 1e-10
    k = len(args)
    n = [0] * k
    v = [0.0] * k
    m = [0.0] * k
    nargs = []
    for i in range(k):
        nargs.append(copy.deepcopy(args[i]))
        n[i] = len(nargs[i])
        v[i] = var([float(na) for na in nargs[i]])
        m[i] = central_tendency.mean([float(na) for na in nargs[i]])
    for j in range(k):
        for i in range(n[j]):
            t1 = (n[j] - 1.5) * n[j] * (nargs[j][i] - m[j])**2
            t2 = 0.5 * v[j] * (n[j] - 1.0)
            t3 = (n[j] - 1.0) * (n[j] - 2.0)
            nargs[j][i] = (t1 - t2) / float(t3)
    check = 1
    for j in range(k):
        if v[j] - central_tendency.mean(nargs[j]) > TINY:
            check = 0
    if check != 1:
        raise ValueError('Problem in obrientransform.')
    else:
        return nargs
def ttest_ind(a: List(float), b: List(float)) -> (float, float):
    """
Calculates the t-obtained T-test on TWO INDEPENDENT samples of
scores a, and b.  From Numerical Recipies, p.483.  If printit=1, results
are printed to the screen.  If printit='filename', the results are output
to 'filename' using the given writemode (default=append).  Returns t-value,
and prob.

Usage:   lttest_ind(a,b,printit=0,name1='Samp1',name2='Samp2',writemode='a')
Returns: t-value, two-tailed prob
"""
    printit = 0
    name1 = 'Samp1'
    name2 = 'Samp2'
    writemode = 'a'
    #bg: optional args
    x1 = central_tendency.mean(a)
    x2 = central_tendency.mean(b)
    v1 = variability.stdev(a)**2
    v2 = variability.stdev(b)**2
    n1 = len(a)
    n2 = len(b)
    df = n1 + n2 - 2
    svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / float(df)
    t = (x1 - x2) / sqrt(svar * (1.0 / n1 + 1.0 / n2))
    prob = probability.betai(0.5 * df, 0.5, df / (df + t * t))

    if printit != 0:
        statname = 'Independent samples T-test.'
        outputpairedstats(printit, writemode, name1, n1,
                          x1, v1, min(a), max(a), name2, n2, x2, v2, min(b),
                          max(b), statname, t, prob)
    return t, prob
def ttest_1samp(a: List(float), popmean: int) -> (float, float):
    """
Calculates the t-obtained for the independent samples T-test on ONE group
of scores a, given a population mean.  If printit=1, results are printed
to the screen.  If printit='filename', the results are output to 'filename'
using the given writemode (default=append).  Returns t-value, and prob.

Usage:   lttest_1samp(a,popmean,Name='Sample',printit=0,writemode='a')
Returns: t-value, two-tailed prob
"""
    printit = 0  #bg: optional arg
    name = 'Sample'  #bg: optional arg
    writemode = 'a'  #bg: optional arg
    x = central_tendency.mean(a)
    v = variability.var(a)
    n = len(a)
    df = n - 1
    svar = ((n - 1) * v) / float(df)
    t = (x - popmean) / sqrt(svar * (1.0 / n))
    prob = probability.betai(0.5 * df, 0.5, float(df) / (df + t * t))

    if printit != 0:
        statname = 'Single-sample T-test.'
        outputpairedstats(printit, writemode,
                          'Population', '--', popmean, 0, 0, 0, name, n, x, v,
                          min(a), max(a), statname, t, prob)
    return t, prob
Beispiel #6
0
def variation(inlist: List(float)) -> float:
    """
Returns the coefficient of variation, as defined in CRC Standard
Probability and Statistics, p.6.

Usage:   lvariation(inlist)
"""
    return 100.0 * variability.samplestdev(inlist) / float(
        central_tendency.mean(inlist))
def z(inlist: List(float), score: float) -> float:
    """
Returns the z-score for a given input score, given that score and the
list from which that score came.  Not appropriate for population calculations.

Usage:   lz(inlist, score)
"""
    _z = (score - central_tendency.mean(inlist)) / samplestdev(inlist)
    return _z
def samplevar(inlist: List(float)) -> float:
    """
Returns the variance of the values in the passed list using
N for the denominator (i.e., DESCRIBES the sample variance only).

Usage:   lsamplevar(inlist)
"""
    n = len(inlist)
    mn = central_tendency.mean(inlist)
    deviations = []
    for item in inlist:
        deviations.append(item - mn)
    return support.ss(deviations) / float(n)
def var(inlist: List(float)) -> float:
    """
Returns the variance of the values in the passed list using N-1
for the denominator (i.e., for estimating population variance).

Usage:   lvar(inlist)
"""
    n = len(inlist)
    mn = central_tendency.mean(inlist)
    #bg#deviations = dyn([0]*len(inlist))
    deviations = [0] * len(inlist)
    for i in range(len(inlist)):
        deviations[i] = inlist[i] - mn
    return support.ss(deviations) / float(n - 1)
def cov(x: List(float), y: List(float)) -> float:
    """
Returns the estimated covariance of the values in the passed
array (i.e., N-1).  Dimension can equal None (ravel array first), an
integer (the dimension over which to operate), or a sequence (operate
over multiple dimensions).  Set keepdims=1 to return an array with the
same number of dimensions as inarray.

Usage:   lcov(x,y,keepdims=0)
"""
    keepdims = 0  #bg: was optional argument
    n = len(x)
    xmn = central_tendency.mean(x)
    ymn = central_tendency.mean(y)
    xdeviations = [0] * len(x)
    ydeviations = [0] * len(y)
    for i in range(len(x)):
        xdeviations[i] = x[i] - xmn
        ydeviations[i] = y[i] - ymn
    ss = 0.0
    for i in range(len(xdeviations)):
        ss = ss + xdeviations[i] * ydeviations[i]
    return ss / float(n - 1)
Beispiel #11
0
def describe(inlist: List(float)) -> (int, (float, float), float, float, float,
                                      float):
    """
Returns some descriptive statistics of the passed list (assumed to be 1D).

Usage:   ldescribe(inlist)
Returns: n, mean, standard deviation, skew, kurtosis
"""
    n = len(inlist)
    mm = (min(inlist), max(inlist))
    m = central_tendency.mean(inlist)
    sd = variability.stdev(inlist)
    sk = skew(inlist)
    kurt = kurtosis(inlist)
    return n, mm, m, sd, sk, kurt
Beispiel #12
0
def pearsonr(x:List(float),y:List(float))->(float,float):
    """
Calculates a Pearson correlation coefficient and the associated
probability value.  Taken from Heiman's Basic Statistics for the Behav.
Sci (2nd), p.195.

Usage:   lpearsonr(x,y)      where x and y are equal-length lists
Returns: Pearson's r value, two-tailed p-value
"""
    TINY = 1.0e-30
    if len(x) != len(y):
        raise ValueError('Input values not paired in pearsonr.  Aborting.',x,y)
    n = len(x)
    x = list(map(float,x))
    y = list(map(float,y))
    xmean = central_tendency.mean(x)
    ymean = central_tendency.mean(y)
    r_num = n*(support.summult(x,y)) - sum(x)*sum(y)
    r_den = sqrt((n*support.ss(x) - support.square_of_sums(x))*(n*support.ss(y)-support.square_of_sums(y)))
    r = (r_num / r_den)  # denominator already a float
    df = n-2
    t = r*sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
    prob = probability.betai(0.5*df,0.5,df/float(df+t*t))
    return r, prob
def ttest_rel(a: List(float), b: List(float)) -> (float, float):
    """
Calculates the t-obtained T-test on TWO RELATED samples of scores,
a and b.  From Numerical Recipies, p.483.  If printit=1, results are
printed to the screen.  If printit='filename', the results are output to
'filename' using the given writemode (default=append).  Returns t-value,
and prob.

Usage:   lttest_rel(a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a')
Returns: t-value, two-tailed prob
"""
    printit = 0
    name1 = 'Sample1'
    name2 = 'Sample2'
    writemode = 'a'  #bg: optional arg
    if len(a) != len(b):
        raise ValueError('Unequal length lists in ttest_rel.')
    x1 = central_tendency.mean(a)
    x2 = central_tendency.mean(b)
    v1 = variability.var(a)
    v2 = variability.var(b)
    n = len(a)
    cov = 0
    for i in range(len(a)):
        cov = cov + (a[i] - x1) * (b[i] - x2)
    df = n - 1
    _cov = cov / float(df)
    sd = sqrt((v1 + v2 - 2.0 * _cov) / float(n))
    t = (x1 - x2) / sd
    prob = probability.betai(0.5 * df, 0.5, df / (df + t * t))

    if printit != 0:
        statname = 'Related samples T-test.'
        outputpairedstats(printit, writemode, name1, n, x1, v1, min(a), max(a),
                          name2, n, x2, v2, min(b), max(b), statname, t, prob)
    return t, prob
Beispiel #14
0
def moment(inlist: List(float), moment: int) -> float:
    """
Calculates the nth moment about the mean for a sample (defaults to
the 1st moment).  Used to calculate coefficients of skewness and kurtosis.

Usage:   lmoment(inlist,moment=1)
Returns: appropriate moment (r) from ... 1/n * SUM((inlist(i)-mean)**r)
"""
    if moment == 1:
        return 0.0
    else:
        mn = central_tendency.mean(inlist)
        n = len(inlist)
        s = 0
        for x in inlist:
            s = s + (x - mn)**moment
        return s / float(n)
Beispiel #15
0
with t:
    l = list(map(float, range(1, LIST_SIZE)))
    lf = list(map(float, range(1, LIST_SIZE)))
    lf[2] = 3.0
    ll = [l] * 5

    print('\nCENTRAL TENDENCY')
    print('geometricmean:', central_tendency.geometricmean(l),
          central_tendency.geometricmean(lf),
          central_tendency.geometricmean(l),
          central_tendency.geometricmean(lf))
    print('harmonicmean:', central_tendency.harmonicmean(l),
          central_tendency.harmonicmean(lf), central_tendency.harmonicmean(l),
          central_tendency.harmonicmean(lf))
    print('mean:', central_tendency.mean(l), central_tendency.mean(lf),
          central_tendency.mean(l), central_tendency.mean(lf))
    print('median:', central_tendency.median(l), central_tendency.median(lf),
          central_tendency.median(l), central_tendency.median(lf))
    print('medianscore:', central_tendency.medianscore(l),
          central_tendency.medianscore(lf), central_tendency.medianscore(l),
          central_tendency.medianscore(lf))
    print('mode:', central_tendency.mode(l), central_tendency.mode(l))

    print('\nMOMENTS')
    print('moment:', moment.moment(l, 2), moment.moment(lf, 2),
          moment.moment(l, 2), moment.moment(lf, 2))
    print('variation:', moment.variation(l), moment.variation(l),
          moment.variation(lf), moment.variation(lf))
    print('skew:', moment.skew(l), moment.skew(lf), moment.skew(l),
          moment.skew(lf))
 def testMeanResult(self):
     self.assertEqual(5.5, central_tendency.mean(sample1))
     self.assertTrue((13.78 - central_tendency.mean(sample7)) < 0.01)
 def testMeanResult(self):
     self.assertEqual(5.5, central_tendency.mean(sample1))
     self.assertTrue((13.78 - central_tendency.mean(sample7)) < 0.01)