コード例 #1
0
ファイル: lib.py プロジェクト: miken/cep_rr_analyses
def gh(sample_a, sample_b, **kwargs):
    '''
    Calculate Games-Howell from two samples
    Sample A and Sample B are array-like data stores
    Ideally they should be numpy arrays or pandas Series
    So we can perform mean and standard deviation calculations with them
    This functions will return the mean difference and the p-value
    '''
    # Retrieve argument(s)
    r = kwargs.get('r')
    # For Games-Howell, we'll have to calculate a custom standard error
    # And custom df to get q statistic
    mean_a = sample_a.mean()
    var_a = sample_a.var()
    count_a = sample_a.count()
    s2n_a = var_a / count_a
    mean_b = sample_b.mean()
    var_b = sample_b.var()
    count_b = sample_b.count()
    s2n_b = var_b / count_b
    standard_error = sqrt((1/2) * (s2n_a + s2n_b))
    mean_diff = mean_a - mean_b
    q = abs(mean_diff) / standard_error
    # Next, calculate custom df
    df_numer = (s2n_a + s2n_b)**2
    df_denom = (s2n_a**2 / (count_a - 1)) + (s2n_b**2 / (count_b - 1))
    df = df_numer / df_denom
    p = psturng(q, r, df)
    return mean_diff, p    
コード例 #2
0
    def test_1000_random_values(self):
        n = 1000
        ps = np.random.random(n)*(.999 - .1) + .1
        rs = np.random.random_integers(2, 100, n)
        vs = np.random.random(n)*998. + 2.
        qs = qsturng(ps, rs, vs)
        estimates = psturng(qs, rs, vs)
        actuals = 1. - ps
        errors = estimates - actuals

        assert_equal(np.array([]), np.where(errors > 1e-5)[0])
コード例 #3
0
 def test_vector(self):
     "vector input -> vector output"
     assert_array_almost_equal(np.array([0.10679889,
                                          0.06550009,
                                          0.01730145]),
                               psturng([3.98832389,
                                        4.56835318,
                                        6.26400894],
                                       [4, 4, 4],
                                       [6, 6, 6]),
                               5)
コード例 #4
0
    def test_handful_to_known_values(self):
        cases = [(0.71499578726111435, 67, 956.70742488392386, 5.0517658443070692),
                 (0.42974234855067672, 16, 723.50261736502318, 3.3303582093701354),
                 (0.94936429359548424, 2, 916.1867328010926, 2.7677975546417244),
                 (0.85357381770725038, 66, 65.67055060832368, 5.5647438108270109),
                 (0.87372108021900929, 74, 626.42369474993632, 5.5355540570701107),
                 (0.53891960564713726, 49, 862.63799438485785, 4.5108645923377146),
                 (0.98818659555664567, 18, 36.269686711464274, 6.0906643750886156),
                 (0.53031994896037626, 50, 265.29558652727917, 4.5179640079726795),
                 (0.7318857887397332, 59, 701.41497552251201, 4.9980139875409915),
                 (0.65332019368982697, 61, 591.01183664195912, 4.8706581766706893),
                 (0.55403221657248558, 77, 907.34156725405194, 4.8786135917984632),
                 (0.30783916857266003, 83, 82.446923487980882, 4.4396401242858294),
                 (0.29321720242415661, 16, 709.64382575553009, 3.0304277540702729),
                 (0.27146478168880306, 31, 590.00594683574172, 3.5870031664477215),
                 (0.67348796958433776, 81, 608.02706111127657, 5.1096199974432936),
                 (0.32774393945968938, 18, 17.706224399250839, 3.2119038163765432),
                 (0.7081637474795982, 72, 443.10678914889695, 5.0990030889410649),
                 (0.33354939276757861, 47, 544.0772192199048, 4.0613352964193279),
                 (0.60412143947363051, 36, 895.83526933271548, 4.381717596850172),
                 (0.88739052300665977, 77, 426.03665511558262, 5.6333929480341309)]

        for p,r,v,q in cases:
            assert_almost_equal(1.-p, psturng(q,r,v), 5)
コード例 #5
0
ファイル: lib.py プロジェクト: miken/cep_rr_analyses
def tukey(sample_a, sample_b, **kwargs):
    '''
    Calculate Tukey's HSD and significance from two samples
    Sample A and Sample B are array-like data stores
    Ideally they should be numpy arrays or pandas Series
    So we can perform mean and standard deviation calculations with them
    We'll also pass the Mean Squares Within here as msw
    This functions will return the mean difference and the p-value
        r: number of samples in total
        df: degrees of freedom - this will be the sum of (count of each sample -1)
    '''
    # Retrieve arguments
    msw = kwargs.get('msw')
    r = kwargs.get('r')
    df = kwargs.get('df')
    mean_a = sample_a.mean()
    count_a = sample_a.count()
    mean_b = sample_b.mean()
    count_b = sample_b.count()
    standard_error = sqrt(msw * (1/2) * (1/count_a + 1/count_b))
    mean_diff = mean_a - mean_b
    q = abs(mean_diff) / standard_error
    p = psturng(q, r, df)
    return mean_diff, p
コード例 #6
0
 def test_v_equal_one(self):
     assert_almost_equal(.1, psturng(.2,5,1), 5)
コード例 #7
0
 def test_scalar(self):
     "scalar input -> scalar output"
     assert_almost_equal(.1, psturng(4.43645545899562,5,6), 5)
コード例 #8
0
import math
import time
import numpy as np
import pylab

from qsturng import qsturng, psturng, v_keys
from qsturng.make_tbls import R

n = 100
ps = np.random.random(n) * (.999 - .1) + .1
rs = np.random.random_integers(2, 100, n)
vs = np.random.random(n) * 998. + 2.
qs = qsturng(ps, rs, vs)
t0 = time.time()
estimates = psturng(qs, rs, vs)
import pprint
pprint.pprint([(p, r, v, q) for p, r, v, q in zip(ps, rs, vs, qs)])

print time.time() - t0

actuals = 1. - ps
errors = estimates - actuals

pylab.figure()
pylab.hist(errors, bins=100)
yticks = pylab.yticks()[0]
pylab.yticks(yticks, [r'$%i$' % t for t in yticks])
xticks = pylab.xticks()[0]
pylab.xticks(xticks, [r'$%.0e$' % t for t in xticks])
pylab.text(0,
コード例 #9
0
import math
import time
import numpy as np
import pylab

from qsturng import qsturng, psturng, v_keys
from qsturng.make_tbls import R

n = 100
ps = np.random.random(n)*(.999 - .1) + .1
rs = np.random.random_integers(2, 100, n)
vs = np.random.random(n)*998. + 2.
qs = qsturng(ps, rs, vs)
t0=time.time()
estimates = psturng(qs, rs, vs)
import pprint
pprint.pprint([(p,r,v,q) for p,r,v,q in zip(ps,rs,vs,qs)])

print time.time()-t0

actuals = 1. - ps
errors = estimates - actuals

pylab.figure()
pylab.hist(errors, bins=100)
yticks = pylab.yticks()[0]
pylab.yticks(yticks, [r'$%i$'%t for t in yticks])
xticks = pylab.xticks()[0]
pylab.xticks(xticks, [r'$%.0e$'%t for t in xticks])
pylab.text(0, 475,