def BrownForsytheTest(x, y): # Data analysis K = 2 Ni = np.array([len(x), len(y)]) N = Ni.sum() # Transform data Zi = np.abs(np.array([x - np.median(x), y - np.median(y)])) Z_Bar = np.concatenate([Zi[0], Zi[1]]).mean() Zj = np.array([Zi[0].mean(), Zi[1].mean()]) # Compute test result Nominator, Denominator = 0, 0 for i in range(K): Nominator += Ni[i] * (Zj[i] - Z_Bar)**2 for j in range(Ni[i]): Denominator += (Zi[i][j] - Zj[i])**2 W = (N - K) / (K - 1) * Nominator / Denominator # Compute p value p = 1 - f.cdf(W, K - 1, N - K) return W, p
def gelman_rubin(chains, return_cdf=False): """ Compute the Gelman-Rubin R-statistic from an ensemble of chains. `chains` is expected to have shape `(nsteps, nchains)` if samples are one dimensional, or `(nsteps, nchains, ndim)` if multidimensional. For multidimensional samples R-statistics will be computed for each dimension. :param chains: An `(nsteps, nchains)` or `(nsteps, nchains, ndim)`-shaped array. :param return_cdf: (optional) If ``True``, the CDF of the R-statistic(s), assuming an F-distribution, are returned in addition to the R-statistic(s). """ if len(chains.shape) > 2: results = [ gelman_rubin(chains[..., param], return_cdf=return_cdf) for param in range(chains.shape[-1]) ] if return_cdf: return zip(*results) else: return results nchains, nsteps = chains.shape[1], chains.shape[0] chain_means = np.mean(chains, axis=0) chain_vars = np.var(chains, axis=0) # between-chain variance interchain_var = np.sum((chain_means - np.mean(chains))**2) / (nchains - 1) # within-chain variances intrachain_vars = (chains - chain_means)**2 / (nsteps - 1) intrachain_var = np.sum(intrachain_vars) / nchains var = intrachain_var * (nsteps - 1) / nsteps + interchain_var post_var = var + interchain_var / nchains # The Statistic R = np.sqrt(post_var / intrachain_var) if return_cdf: # R should be F-distributed dof1 = nchains - 1 dof2 = 2 * intrachain_var**2 * nchains / np.var(intrachain_vars) return R, f.cdf(R, dof1, dof2) else: return R
def gelman_rubin(chains, return_cdf=False): """ Compute the Gelman-Rubin R-statistic from an ensemble of chains. `chains` is expected to have shape `(nsteps, nchains)` if samples are one dimensional, or `(nsteps, nchains, ndim)` if multidimensional. For multidimensional samples R-statistics will be computed for each dimension. :param chains: An `(nsteps, nchains)` or `(nsteps, nchains, ndim)`-shaped array. :param return_cdf: (optional) If ``True``, the CDF of the R-statistic(s), assuming an F-distribution, are returned in addition to the R-statistic(s). """ if len(chains.shape) > 2: results = [gelman_rubin(chains[..., param], return_cdf=return_cdf) for param in range(chains.shape[-1])] if return_cdf: return zip(*results) else: return results nchains, nsteps = chains.shape[1], chains.shape[0] chain_means = np.mean(chains, axis=0) chain_vars = np.var(chains, axis=0) # between-chain variance interchain_var = np.sum((chain_means - np.mean(chains)) ** 2) / (nchains - 1) # within-chain variances intrachain_vars = (chains - chain_means)**2 / (nsteps - 1) intrachain_var = np.sum(intrachain_vars)/nchains var = intrachain_var * (nsteps - 1) / nsteps + interchain_var post_var = var + interchain_var / nchains # The Statistic R = np.sqrt(post_var / intrachain_var) if return_cdf: # R should be F-distributed dof1 = nchains - 1 dof2 = 2*intrachain_var**2*nchains/np.var(intrachain_vars) return R, f.cdf(R, dof1, dof2) else: return R
def ANOVA(tab): """ ANOVA test for table tab (tab should be a list of lists). Values returned are in order: Fs: Value for the variable F (F of snedecor). glentre: degrees of freedom in between. gldentro: degrees of fredom inside. 1-fsnede: left tail (p-value for the test). """ r = len(tab) ni = [len(ele) for ele in tab] xbi = [stats(ele)[0] for ele in tab] N = sum(ni) XB = sum([ni[ii]*xbi[ii] for ii in xrange(r)])/N ssi = [sum([(ele - xbi[ii])**2 for ele in ele2]) for ii, ele2 in enumerate(tab)] SSdentro = sum(ssi) gldentro = N-r MSdentro = SSdentro/gldentro SSentre = sum([ni[ii]*(ele-XB)**2 for ii, ele in enumerate(xbi)]) glentre = r-1 MSentre = SSentre/glentre Fs = MSentre/MSdentro return [Fs, glentre, gldentro, 1.-fsnede.cdf(Fs,glentre,gldentro)]
def predict_functional(result, focus_var, summaries=None, values=None, summaries2=None, values2=None, alpha=0.05, ci_method="pointwise", linear=True, num_points=10, exog=None, exog2=None, **kwargs): if ci_method not in ("pointwise", "scheffe", "simultaneous"): raise ValueError('confidence band method must be one of ' '`pointwise`, `scheffe`, and `simultaneous`.') contrast = (values2 is not None) or (summaries2 is not None) if contrast and not linear: raise ValueError("`linear` must be True for computing contrasts") model = result.model if exog is not None: if any(x is not None for x in [summaries, summaries2, values, values2]): raise ValueError("if `exog` is provided then do not " "provide `summaries` or `values`") fexog = exog dexog = patsy.dmatrix(model.data.design_info, fexog, return_type='dataframe') fvals = exog[focus_var] if exog2 is not None: fexog2 = exog dexog2 = patsy.dmatrix(model.data.design_info, fexog2, return_type='dataframe') fvals2 = fvals else: values, summaries, values2, summaries2 = _check_args( values, summaries, values2, summaries2) dexog, fexog, fvals = _make_exog(result, focus_var, summaries, values, num_points) if len(summaries2) + len(values2) > 0: dexog2, fexog2, fvals2 = _make_exog(result, focus_var, summaries2, values2, num_points) from statsmodels.genmod.generalized_linear_model import GLM from statsmodels.genmod.generalized_estimating_equations import GEE if isinstance(result.model, (GLM, GEE)): kwargs_pred = kwargs.copy() kwargs_pred.update({"linear": True}) else: kwargs_pred = kwargs pred = result.predict(exog=fexog, **kwargs_pred) if contrast: pred2 = result.predict(exog=fexog2, **kwargs_pred) pred = pred - pred2 dexog = dexog - dexog2 if ci_method == 'pointwise': t_test = result.t_test(dexog) cb = t_test.conf_int(alpha=alpha) elif ci_method == 'scheffe': t_test = result.t_test(dexog) sd = t_test.sd cb = np.zeros((num_points, 2)) # Scheffe's method from scipy.stats.distributions import f as fdist df1 = result.model.exog.shape[1] df2 = result.model.exog.shape[0] - df1 qf = fdist.cdf(1 - alpha, df1, df2) fx = sd * np.sqrt(df1 * qf) cb[:, 0] = pred - fx cb[:, 1] = pred + fx elif ci_method == 'simultaneous': sigma, c = _glm_basic_scr(result, dexog, alpha) cb = np.zeros((dexog.shape[0], 2)) cb[:, 0] = pred - c * sigma cb[:, 1] = pred + c * sigma if not linear: # May need to support other models with link-like functions. link = result.family.link pred = link.inverse(pred) cb = link.inverse(cb) return pred, cb, fvals
def predict_functional(result, focus_var, summaries=None, values=None, summaries2=None, values2=None, alpha=0.05, ci_method="pointwise", linear=True, num_points=10, exog=None, exog2=None, **kwargs): # docstring attached below if ci_method not in ("pointwise", "scheffe", "simultaneous"): raise ValueError('confidence band method must be one of `pointwise`, `scheffe`, and `simultaneous`.') contrast = (values2 is not None) or (summaries2 is not None) if contrast and not linear: raise ValueError("`linear` must be True for computing contrasts") model = result.model if exog is not None: if any(x is not None for x in [summaries, summaries2, values, values2]): raise ValueError("if `exog` is provided then do not provide `summaries` or `values`") fexog = exog dexog = patsy.dmatrix(model.data.design_info.builder, fexog, return_type='dataframe') fvals = exog[focus_var] if exog2 is not None: fexog2 = exog dexog2 = patsy.dmatrix(model.data.design_info.builder, fexog2, return_type='dataframe') fvals2 = fvals else: values, summaries, values2, summaries2 = _check_args(values, summaries, values2, summaries2) dexog, fexog, fvals = _make_exog(result, focus_var, summaries, values, num_points) if len(summaries2) + len(values2) > 0: dexog2, fexog2, fvals2 = _make_exog(result, focus_var, summaries2, values2, num_points) from statsmodels.genmod.generalized_linear_model import GLM from statsmodels.genmod.generalized_estimating_equations import GEE if isinstance(result.model, (GLM, GEE)): kwargs_pred = kwargs.copy() kwargs_pred.update({"linear": True}) else: kwargs_pred = kwargs pred = result.predict(exog=fexog, **kwargs_pred) if contrast: pred2 = result.predict(exog=fexog2, **kwargs_pred) pred = pred - pred2 dexog = dexog - dexog2 if ci_method == 'pointwise': t_test = result.t_test(dexog) cb = t_test.conf_int(alpha=alpha) elif ci_method == 'scheffe': t_test = result.t_test(dexog) sd = t_test.sd cb = np.zeros((num_points, 2)) # Scheffe's method from scipy.stats.distributions import f as fdist df1 = result.model.exog.shape[1] df2 = result.model.exog.shape[0] - df1 qf = fdist.cdf(1 - alpha, df1, df2) fx = sd * np.sqrt(df1 * qf) cb[:, 0] = pred - fx cb[:, 1] = pred + fx elif ci_method == 'simultaneous': sigma, c = _glm_basic_scr(result, dexog, alpha) cb = np.zeros((dexog.shape[0], 2)) cb[:, 0] = pred - c*sigma cb[:, 1] = pred + c*sigma if not linear: # May need to support other models with link-like functions. link = result.family.link pred = link.inverse(pred) cb = link.inverse(cb) return pred, cb, fvals