def _test(actual1, actual2, model, iters=1000):
    """
    Estimates p-values based on differences in the mean.
    
    Args:
        actual1:
        actual2: sequences of observed values for groups 1 and 2
        model:   sequences of values from the hypothetical distribution
    """
    n = len(actual1)
    m = len(actual2)

    # compute delta
    mu1, mu2, delta = hypothesis._difference_in_mean(actual1, actual2)
    delta = abs(delta)

    print('n:', n)
    print('m:', m)
    print('mu1', mu1)
    print('mu2', mu2)
    print('delta', delta)

    # compute the expected distribution of differences in sample mean
    mu_pooled, var_pooled = _03_thinkstats._mean_var(model)
    print('(Mean, Var) of pooled data', mu_pooled, var_pooled)

    f = 1.0 / n + 1.0 / m
    mu, var = (0, f * var_pooled)
    print('Expected Mean, Var of deltas', mu, var)

    # compute the p-value of delta in the observed distribution
    sigma = math.sqrt(var)
    left = _16_erf._normal_cdf(-delta, mu, sigma)
    right = 1 - _16_erf._normal_cdf(delta, mu, sigma)
    pvalue = left + right
    print('Tails:', left, right)
    print('p-value:', pvalue)

    # compare the mean and variance of resamples differences
    deltas = [hypothesis._resample(model, model, n, m) for i in range(iters)]
    mean_var = _03_thinkstats._mean_var(deltas)
    print('(Mean, Var) of resampled deltas', mean_var)

    return pvalue
Exemple #2
0
def _render_normal_cdf(mu, sigma, max, n=50):
    """Generates sequences of xs and ps for a normal CDF."""
    xs = [max * i / n for i in range(n)]
    ps = [_16_erf._normal_cdf(x, mu, sigma) for x in xs]
    return xs, ps