コード例 #1
0
ファイル: correlation.py プロジェクト: jengowong/think-stats
def _coef_determination(ys, res):
    """
    Computes the coefficient of determination (R^2) for given residuals.

    Args:
        ys:  dependent variable
        res: residuals
        
    Returns:
        float coefficient of determination
    """
    ybar, vary = _03_thinkstats._mean_var(ys)
    resbar, varres = _03_thinkstats._mean_var(res)
    return 1 - varres / vary
コード例 #2
0
ファイル: correlation.py プロジェクト: jengowong/think-stats
def _corr(xs, ys):
    """
    Computes Corr(X, Y).

    Args:
        xs: sequence of values
        ys: sequence of values

    Returns:
        Corr(X, Y)
    """
    xbar, varx = _03_thinkstats._mean_var(xs)
    ybar, vary = _03_thinkstats._mean_var(ys)

    corr = _cov(xs, ys, xbar, ybar) / math.sqrt(varx * vary)

    return corr
コード例 #3
0
ファイル: correlation.py プロジェクト: jengowong/think-stats
def _least_squares(xs, ys):
    """
    Computes a linear least squares fit for ys as a function of xs.

    Args:
        xs: sequence of values
        ys: sequence of values

    Returns:
        tuple of (intercept, slope)
    """
    xbar, varx = _03_thinkstats._mean_var(xs)
    ybar, vary = _03_thinkstats._mean_var(ys)

    slope = _cov(xs, ys, xbar, ybar) / varx
    inter = ybar - slope * xbar

    return inter, slope
コード例 #4
0
def _test(actual1, actual2, model, iters=1000):
    """
    Estimates p-values based on differences in the mean.
    
    Args:
        actual1:
        actual2: sequences of observed values for groups 1 and 2
        model:   sequences of values from the hypothetical distribution
    """
    n = len(actual1)
    m = len(actual2)

    # compute delta
    mu1, mu2, delta = hypothesis._difference_in_mean(actual1, actual2)
    delta = abs(delta)

    print('n:', n)
    print('m:', m)
    print('mu1', mu1)
    print('mu2', mu2)
    print('delta', delta)

    # compute the expected distribution of differences in sample mean
    mu_pooled, var_pooled = _03_thinkstats._mean_var(model)
    print('(Mean, Var) of pooled data', mu_pooled, var_pooled)

    f = 1.0 / n + 1.0 / m
    mu, var = (0, f * var_pooled)
    print('Expected Mean, Var of deltas', mu, var)

    # compute the p-value of delta in the observed distribution
    sigma = math.sqrt(var)
    left = _16_erf._normal_cdf(-delta, mu, sigma)
    right = 1 - _16_erf._normal_cdf(delta, mu, sigma)
    pvalue = left + right
    print('Tails:', left, right)
    print('p-value:', pvalue)

    # compare the mean and variance of resamples differences
    deltas = [hypothesis._resample(model, model, n, m) for i in range(iters)]
    mean_var = _03_thinkstats._mean_var(deltas)
    print('(Mean, Var) of resampled deltas', mean_var)

    return pvalue
コード例 #5
0
ファイル: bayes_height.py プロジェクト: jengowong/think-stats
def _make_uniform_prior(t, num_points, label, spread=3.0):
    """
    Makes a prior distribution for mu and sigma based on a sample.

    Args:
        t:          sample
        num_points: number of values in each dimension
        label:      string label for the new Pmf
        spread:     number of standard errors to include

    Returns:
        Pmf that maps from (mu, sigma) to prob.
    """
    # estimate mean and stddev of t
    n = len(t)
    xbar, S2 = _03_thinkstats._mean_var(t)
    sighat = math.sqrt(S2)

    print(xbar, sighat, sighat / xbar)

    # compute standard error for mu and the range of ms
    stderr_xbar = sighat / math.sqrt(n)
    mspread = spread * stderr_xbar
    ms = numpy.linspace(xbar - mspread, xbar + mspread, num_points)

    # compute standard error for sigma and the range of ss
    stderr_sighat = sighat / math.sqrt(2 * (n - 1))
    sspread = spread * stderr_sighat
    ss = numpy.linspace(sighat - sspread, sighat + sspread, num_points)

    # populate the PMF
    pmf = _04_Pmf.Pmf(name=label)
    for m in ms:
        for s in ss:
            pmf._set((m, s), 1)
    return ms, ss, pmf
コード例 #6
0
ファイル: social.py プロジェクト: jengowong/think-stats
def _summarize(srcs):
    """Computes the number of edges for each source."""
    lens = [len(t) for t in srcs.itervalues()]
    mu, sigma2 = _03_thinkstats._mean_var(lens)
    print(mu, math.sqrt(sigma2))
    return lens