def _coef_determination(ys, res): """ Computes the coefficient of determination (R^2) for given residuals. Args: ys: dependent variable res: residuals Returns: float coefficient of determination """ ybar, vary = _03_thinkstats._mean_var(ys) resbar, varres = _03_thinkstats._mean_var(res) return 1 - varres / vary
def _corr(xs, ys): """ Computes Corr(X, Y). Args: xs: sequence of values ys: sequence of values Returns: Corr(X, Y) """ xbar, varx = _03_thinkstats._mean_var(xs) ybar, vary = _03_thinkstats._mean_var(ys) corr = _cov(xs, ys, xbar, ybar) / math.sqrt(varx * vary) return corr
def _least_squares(xs, ys): """ Computes a linear least squares fit for ys as a function of xs. Args: xs: sequence of values ys: sequence of values Returns: tuple of (intercept, slope) """ xbar, varx = _03_thinkstats._mean_var(xs) ybar, vary = _03_thinkstats._mean_var(ys) slope = _cov(xs, ys, xbar, ybar) / varx inter = ybar - slope * xbar return inter, slope
def _test(actual1, actual2, model, iters=1000): """ Estimates p-values based on differences in the mean. Args: actual1: actual2: sequences of observed values for groups 1 and 2 model: sequences of values from the hypothetical distribution """ n = len(actual1) m = len(actual2) # compute delta mu1, mu2, delta = hypothesis._difference_in_mean(actual1, actual2) delta = abs(delta) print('n:', n) print('m:', m) print('mu1', mu1) print('mu2', mu2) print('delta', delta) # compute the expected distribution of differences in sample mean mu_pooled, var_pooled = _03_thinkstats._mean_var(model) print('(Mean, Var) of pooled data', mu_pooled, var_pooled) f = 1.0 / n + 1.0 / m mu, var = (0, f * var_pooled) print('Expected Mean, Var of deltas', mu, var) # compute the p-value of delta in the observed distribution sigma = math.sqrt(var) left = _16_erf._normal_cdf(-delta, mu, sigma) right = 1 - _16_erf._normal_cdf(delta, mu, sigma) pvalue = left + right print('Tails:', left, right) print('p-value:', pvalue) # compare the mean and variance of resamples differences deltas = [hypothesis._resample(model, model, n, m) for i in range(iters)] mean_var = _03_thinkstats._mean_var(deltas) print('(Mean, Var) of resampled deltas', mean_var) return pvalue
def _make_uniform_prior(t, num_points, label, spread=3.0): """ Makes a prior distribution for mu and sigma based on a sample. Args: t: sample num_points: number of values in each dimension label: string label for the new Pmf spread: number of standard errors to include Returns: Pmf that maps from (mu, sigma) to prob. """ # estimate mean and stddev of t n = len(t) xbar, S2 = _03_thinkstats._mean_var(t) sighat = math.sqrt(S2) print(xbar, sighat, sighat / xbar) # compute standard error for mu and the range of ms stderr_xbar = sighat / math.sqrt(n) mspread = spread * stderr_xbar ms = numpy.linspace(xbar - mspread, xbar + mspread, num_points) # compute standard error for sigma and the range of ss stderr_sighat = sighat / math.sqrt(2 * (n - 1)) sspread = spread * stderr_sighat ss = numpy.linspace(sighat - sspread, sighat + sspread, num_points) # populate the PMF pmf = _04_Pmf.Pmf(name=label) for m in ms: for s in ss: pmf._set((m, s), 1) return ms, ss, pmf
def _summarize(srcs): """Computes the number of edges for each source.""" lens = [len(t) for t in srcs.itervalues()] mu, sigma2 = _03_thinkstats._mean_var(lens) print(mu, math.sqrt(sigma2)) return lens