def kci_invariance_test( suffstat: Dict, context, i: int, cond_set: Optional[Union[List[int], int]] = None, width: float = 0, alpha: float = 0.05, unbiased: bool = False, regress: bool = True, gamma_approx: bool = True, n_draws: int = 500, lam: float = 1e-3, thresh: float = 1e-5, num_eig: int = 0, ): cond_set = to_list(cond_set) obs_samples = suffstat['obs_samples'] iv_samples = suffstat[context] mat = combined_mat(obs_samples, iv_samples, i, cond_set) return kci_test( mat, 0, 1, list(range(2, 2 + len(cond_set))), width=width, alpha=alpha, unbiased=unbiased, gamma_approx=gamma_approx, regress=regress, n_draws=n_draws, lam=lam, thresh=thresh, num_eig=num_eig, )
def hsic_test(suffstat: np.ndarray, i: int, j: int, cond_set: Union[List[int], int] = None, alpha: float = 0.05) -> Dict: """ Test for (conditional) independence using the Hilbert-Schmidt Information Criterion. If a conditioning set is specified, first perform non-parametric regression, then test residuals. Parameters ---------- suffstat: Matrix of samples. i: column position of first variable. j: column position of second variable. cond_set: column positions of conditioning set. alpha: Significance level of the test. Returns ------- """ cond_set = to_list(cond_set) if len(cond_set) == 0: return hsic_test_vector(suffstat[:, i], suffstat[:, j], alpha=alpha) else: residuals_i, residuals_j = residuals(suffstat, i, j, cond_set) return hsic_test_vector(residuals_i, residuals_j, alpha=alpha)
def hsic_test(suffstat: Any, i: int, j: int, cond_set: Union[List[int], int] = None, alpha: float = 0.05): cond_set = to_list(cond_set) if len(cond_set) == 0: return hsic_test_vector(suffstat[:, i], suffstat[:, j], alpha=alpha) else: residuals_i, residuals_j = residuals(suffstat, i, j, cond_set) return hsic_test_vector(residuals_i, residuals_j, alpha=alpha)
def hsic_invariance_test(suffstat, context, i: int, cond_set: Optional[Union[List[int], int]] = None, alpha: float = 0.05): """ TODO Parameters ---------- TODO Examples -------- TODO """ cond_set = to_list(cond_set) obs_samples = suffstat['obs_samples'] iv_samples = suffstat[context] mat = combined_mat(obs_samples, iv_samples, i, cond_set) return hsic_test(mat, 0, 1, list(range(2, 2 + len(cond_set))), alpha=alpha)
def gauss_invariance_test(suffstat, context, i: int, cond_set: Optional[Union[List[int], int]] = None, alpha: float = 0.05, new=True, zero_mean=False, zero_coeffs=False): """ Test the null hypothesis that two Gaussian distributions are equal. Parameters ---------- suffstat: dictionary containing: 'obs' -- number of samples 'G' -- Gram matrix 'contexts' context: which context to test. i: position of marginal distribution. cond_set: positions of conditioning set in correlation matrix. alpha: Significance level. Return ------ dictionary containing ttest_stat, ftest_stat, f_pvalue, t_pvalue, and reject. """ cond_set = to_list(cond_set) obs_samples = suffstat['obs']['samples'] iv_samples = suffstat['contexts'][context]['samples'] n1, p = obs_samples.shape n2 = iv_samples.shape[0] # === FIND REGRESSION COEFFICIENTS AND RESIDUALS if len(cond_set) != 0: cond_ix = cond_set if zero_mean else [*cond_set, -1] gram1 = suffstat['obs']['G'][np.ix_(cond_ix, cond_ix)] gram2 = suffstat['contexts'][context]['G'][np.ix_(cond_ix, cond_ix)] coefs1 = np.linalg.inv(gram1) @ obs_samples[:, cond_ix].T @ obs_samples[:, i] coefs2 = np.linalg.inv(gram2) @ iv_samples[:, cond_ix].T @ iv_samples[:, i] residuals1 = obs_samples[:, i] - obs_samples[:, cond_ix] @ coefs1 residuals2 = iv_samples[:, i] - iv_samples[:, cond_ix] @ coefs2 elif not zero_mean: gram1 = n1 * np.ones([1, 1]) gram2 = n2 * np.ones([1, 1]) cond_ix = [-1] coefs1 = np.array([np.mean(obs_samples[:, i])]) if not zero_mean else 0 coefs2 = np.array([np.mean(iv_samples[:, i])]) if not zero_mean else 0 residuals1 = obs_samples[:, i] - coefs1 residuals2 = iv_samples[:, i] - coefs2 else: residuals1 = obs_samples[:, i] residuals2 = iv_samples[:, i] # means and variances of residuals var1, var2 = np.var(residuals1, ddof=len(cond_ix)), np.var(residuals2, ddof=len(cond_ix)) # calculate regression coefficient invariance statistic if len(cond_ix) != 0: p = len(cond_ix) rc_stat = (coefs1 - coefs2) @ inv(var1 * inv(gram1) + var2 * inv(gram2)) @ (coefs1 - coefs2).T / p rc_pvalue = ncfdtr(p, n1 + n2 - p, 0, rc_stat) rc_pvalue = 2 * min(rc_pvalue, 1 - rc_pvalue) # calculate statistic for F-Test ftest_stat = var1 / var2 f_pvalue = ncfdtr(n1 - 1, n2 - 1, 0, ftest_stat) f_pvalue = 2 * min(f_pvalue, 1 - f_pvalue) # === ACCEPT/REJECT INVARIANCE HYPOTHESIS BASED ON P-VALUES WITH BONFERRONI CORRECTION if len(cond_ix) != 0: reject = f_pvalue < alpha / 2 or rc_pvalue < alpha / 2 else: reject = f_pvalue < alpha # === FORM RESULT DICT AND RETUR result_dict = dict(ftest_stat=ftest_stat, f_pvalue=f_pvalue, reject=reject) if len(cond_ix) > 0: result_dict['rc_stat'] = rc_stat result_dict['rc_pvalue'] = rc_pvalue return result_dict