コード例 #1
0
def ttest_ind(a: List(float), b: List(float)) -> (float, float):
    """
Calculates the t-obtained T-test on TWO INDEPENDENT samples of
scores a, and b.  From Numerical Recipies, p.483.  If printit=1, results
are printed to the screen.  If printit='filename', the results are output
to 'filename' using the given writemode (default=append).  Returns t-value,
and prob.

Usage:   lttest_ind(a,b,printit=0,name1='Samp1',name2='Samp2',writemode='a')
Returns: t-value, two-tailed prob
"""
    printit = 0
    name1 = 'Samp1'
    name2 = 'Samp2'
    writemode = 'a'
    #bg: optional args
    x1 = central_tendency.mean(a)
    x2 = central_tendency.mean(b)
    v1 = variability.stdev(a)**2
    v2 = variability.stdev(b)**2
    n1 = len(a)
    n2 = len(b)
    df = n1 + n2 - 2
    svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / float(df)
    t = (x1 - x2) / sqrt(svar * (1.0 / n1 + 1.0 / n2))
    prob = probability.betai(0.5 * df, 0.5, df / (df + t * t))

    if printit != 0:
        statname = 'Independent samples T-test.'
        outputpairedstats(printit, writemode, name1, n1,
                          x1, v1, min(a), max(a), name2, n2, x2, v2, min(b),
                          max(b), statname, t, prob)
    return t, prob
コード例 #2
0
def ttest_1samp(a: List(float), popmean: int) -> (float, float):
    """
Calculates the t-obtained for the independent samples T-test on ONE group
of scores a, given a population mean.  If printit=1, results are printed
to the screen.  If printit='filename', the results are output to 'filename'
using the given writemode (default=append).  Returns t-value, and prob.

Usage:   lttest_1samp(a,popmean,Name='Sample',printit=0,writemode='a')
Returns: t-value, two-tailed prob
"""
    printit = 0  #bg: optional arg
    name = 'Sample'  #bg: optional arg
    writemode = 'a'  #bg: optional arg
    x = central_tendency.mean(a)
    v = variability.var(a)
    n = len(a)
    df = n - 1
    svar = ((n - 1) * v) / float(df)
    t = (x - popmean) / sqrt(svar * (1.0 / n))
    prob = probability.betai(0.5 * df, 0.5, float(df) / (df + t * t))

    if printit != 0:
        statname = 'Single-sample T-test.'
        outputpairedstats(printit, writemode,
                          'Population', '--', popmean, 0, 0, 0, name, n, x, v,
                          min(a), max(a), statname, t, prob)
    return t, prob
コード例 #3
0
def pointbiserialr(x:List(float),y:List(float))->(float,float):
    """
Calculates a point-biserial correlation coefficient and the associated
probability value.  Taken from Heiman's Basic Statistics for the Behav.
Sci (1st), p.194.

Usage:   lpointbiserialr(x,y)      where x,y are equal-length lists
Returns: Point-biserial r, two-tailed p-value
"""
    TINY = 1e-30
    if len(x) != len(y):
        raise ValueError('INPUT VALUES NOT PAIRED IN pointbiserialr.  ABORTING.')
    data = pstat.abut(x,y)
    categories = pstat.unique(x)
    if len(categories) != 2:
        raise ValueError("Exactly 2 categories required for pointbiserialr().")
    else:   # there are 2 categories, continue
        codemap = pstat.abut(categories,list(range(2)))
        recoded = pstat.recode(data,codemap,0)
        _x = pstat.linexand(data,0,categories[0])
        _y = pstat.linexand(data,0,categories[1])
        xmean = central_tendency.mean(pstat.colex(_x,1))
        ymean = central_tendency.mean(pstat.colex(_y,1))
        n = len(data)
        adjust = sqrt((len(_x)/float(n))*(len(_y)/float(n)))
        rpb = (ymean - xmean)/variability.samplestdev(pstat.colex(data,1))*adjust
        df = n-2
        t = rpb*sqrt(df/((1.0-rpb+TINY)*(1.0+rpb+TINY)))
        prob = probability.betai(0.5*df,0.5,df/(df+t*t))  # t already a float
        return rpb, prob
コード例 #4
0
def linregress(x:List(float),y:List(float))->(float,float,float,float,float):
    """
Calculates a regression line on x,y pairs.

Usage:   llinregress(x,y)      x,y are equal-length lists of x-y coordinates
Returns: slope, intercept, r, two-tailed prob, sterr-of-estimate
"""
    TINY = 1.0e-20
    if len(x) != len(y):
        raise ValueError('Input values not paired in linregress.  Aborting.')
    n = len(x)
    x = list(map(float,x))
    y = list(map(float,y))
    xmean = central_tendency.mean(x)
    ymean = central_tendency.mean(y)
    r_num = float(n*(support.summult(x,y)) - sum(x)*sum(y))
    r_den = sqrt((n*support.ss(x) - support.square_of_sums(x))*(n*support.ss(y)-support.square_of_sums(y)))
    r = r_num / r_den
    z = 0.5*log((1.0+r+TINY)/(1.0-r+TINY))
    df = n-2
    t = r*sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
    prob = probability.betai(0.5*df,0.5,df/(df+t*t))
    slope = r_num / float(n*support.ss(x) - support.square_of_sums(x))
    intercept = ymean - slope*xmean
    sterrest = sqrt(1-r*r)*variability.samplestdev(y)
    return slope, intercept, r, prob, sterrest
コード例 #5
0
def spearmanr(x:List(float),y:List(float))->(float,float):
    """
Calculates a Spearman rank-order correlation coefficient.  Taken
from Heiman's Basic Statistics for the Behav. Sci (1st), p.192.

Usage:   lspearmanr(x,y)      where x and y are equal-length lists
Returns: Spearman's r, two-tailed p-value
"""
    TINY = 1e-30
    if len(x) != len(y):
        raise ValueError('Input values not paired in spearmanr.  Aborting.')
    n = len(x)
    rankx = rankdata(x)
    ranky = rankdata(y)
    dsq = sumdiffsquared(rankx,ranky)
    rs = 1 - 6*dsq / float(n*(n**2-1))
    t = rs * sqrt((n-2) / ((rs+1.0)*(1.0-rs)))
    df = n-2
    probrs = probability.betai(0.5*df,0.5,df/(df+t*t))  # t already a float
# probability values for rs are from part 2 of the spearman function in
# Numerical Recipies, p.510.  They are close to tables, but not exact. (?)
    return rs, probrs
コード例 #6
0
def ttest_rel(a: List(float), b: List(float)) -> (float, float):
    """
Calculates the t-obtained T-test on TWO RELATED samples of scores,
a and b.  From Numerical Recipies, p.483.  If printit=1, results are
printed to the screen.  If printit='filename', the results are output to
'filename' using the given writemode (default=append).  Returns t-value,
and prob.

Usage:   lttest_rel(a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a')
Returns: t-value, two-tailed prob
"""
    printit = 0
    name1 = 'Sample1'
    name2 = 'Sample2'
    writemode = 'a'  #bg: optional arg
    if len(a) != len(b):
        raise ValueError('Unequal length lists in ttest_rel.')
    x1 = central_tendency.mean(a)
    x2 = central_tendency.mean(b)
    v1 = variability.var(a)
    v2 = variability.var(b)
    n = len(a)
    cov = 0
    for i in range(len(a)):
        cov = cov + (a[i] - x1) * (b[i] - x2)
    df = n - 1
    _cov = cov / float(df)
    sd = sqrt((v1 + v2 - 2.0 * _cov) / float(n))
    t = (x1 - x2) / sd
    prob = probability.betai(0.5 * df, 0.5, df / (df + t * t))

    if printit != 0:
        statname = 'Related samples T-test.'
        outputpairedstats(printit, writemode, name1, n, x1, v1, min(a), max(a),
                          name2, n, x2, v2, min(b), max(b), statname, t, prob)
    return t, prob
コード例 #7
0
def pearsonr(x:List(float),y:List(float))->(float,float):
    """
Calculates a Pearson correlation coefficient and the associated
probability value.  Taken from Heiman's Basic Statistics for the Behav.
Sci (2nd), p.195.

Usage:   lpearsonr(x,y)      where x and y are equal-length lists
Returns: Pearson's r value, two-tailed p-value
"""
    TINY = 1.0e-30
    if len(x) != len(y):
        raise ValueError('Input values not paired in pearsonr.  Aborting.',x,y)
    n = len(x)
    x = list(map(float,x))
    y = list(map(float,y))
    xmean = central_tendency.mean(x)
    ymean = central_tendency.mean(y)
    r_num = n*(support.summult(x,y)) - sum(x)*sum(y)
    r_den = sqrt((n*support.ss(x) - support.square_of_sums(x))*(n*support.ss(y)-support.square_of_sums(y)))
    r = (r_num / r_den)  # denominator already a float
    df = n-2
    t = r*sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))
    prob = probability.betai(0.5*df,0.5,df/float(df+t*t))
    return r, prob