def get_interactions(df, cov_df, surv, test): binary = df[df.T.describe().ix['unique'] == 2] '''drop redundant features within a data-type''' s = {b for i, (a, v1) in enumerate(binary.iterrows()) for j, (b, v2) in enumerate(binary.iterrows()) if (i < j) and a[0] == b[0] and np.log2(fisher_exact_test(v1, v2)['odds_ratio']) > 4} binary = binary.ix[binary.index.diff(s)] n_tests = (len(binary) * (len(binary) - 1)) / 2 s = pd.DataFrame({(a, b): interaction_empirical_p(v1, v2, surv, num_perm=101) for a, v1 in binary.iterrows() for b, v2 in binary.iterrows() if (a < b) and fisher_exact_test(v1, v2).ix['p'] < (.05 / n_tests) and fisher_exact_test(v1, v2).ix['odds_ratio'] != np.inf and a[0] != b[0]}).T int_pairs = s.ix[s.p < .1].sort('p') int_associations = {} for p, vals in int_pairs.iterrows(): combo = combine(binary.ix[p[0]], binary.ix[p[1]]) vec = combo == vals['interaction'] int_associations[p] = test(vec, surv, cov_df) int_associations = pd.DataFrame(int_associations).T return s, int_associations
def get_interactions(df, cov_df, surv, test): binary = df[df.T.describe().ix['unique'] == 2] #drop redundant features within a data-type s = { b for i, (a, v1) in enumerate(binary.iterrows()) for j, (b, v2) in enumerate(binary.iterrows()) if (i < j) and a[0] == b[0] and np.log2(fisher_exact_test(v1, v2)['odds_ratio']) > 4 } binary = binary.ix[binary.index.diff(s)] n_tests = (len(binary) * (len(binary) - 1)) / 2 s = pd.DataFrame({ (a, b): interaction_empirical_p(v1, v2, surv, num_perm=101) for a, v1 in binary.iterrows() for b, v2 in binary.iterrows() if (a < b) and fisher_exact_test(v1, v2).ix['p'] < (.05 / n_tests) and fisher_exact_test(v1, v2).ix['odds_ratio'] != np.inf and a[0] != b[0] }).T int_pairs = s.ix[s.p < .1].sort('p') int_associations = {} for p, vals in int_pairs.iterrows(): combo = combine(binary.ix[p[0]], binary.ix[p[1]]) vec = combo == vals['interaction'] int_associations[p] = test(vec, surv, cov_df) int_associations = pd.DataFrame(int_associations).T return s, int_associations
def interaction_empirical_p_resample(a, b, surv, num_perm=101, check_first=True): ''' Calculate an empirical p-value for an interaction by sampling with replacement. We first test if there is an improvement in model fit by considering the interaction of the two events. If so, we then derive an empirical p-value. ''' a, b = match_series(a, b) if fisher_exact_test(a, b)['odds_ratio'] > 1: int_direction = 'both' else: int_direction = 'neither' r = get_interaction(a, b, surv) if (r < 0) and (check_first is True): return pd.Series({'p': 1, 'interaction': int_direction}) mat = np.random.choice(a.index, size=(num_perm, len(a.index))) vec = {} for i, idx in enumerate(mat): a_p = pd.Series(list(a.ix[idx]), range(len(idx))) b_p = pd.Series(list(b.ix[idx]), range(len(idx))) surv_p = pd.DataFrame(surv.unstack().ix[a.index].as_matrix(), index=range(len(idx)), columns=['days', 'event']).stack() vec[i] = get_interaction(a_p, b_p, surv_p, int_direction) vec = pd.Series(vec) empirical_p = 1.*(len(vec) - sum(vec <= r)) / len(vec) return pd.Series({'p': empirical_p, 'interaction': int_direction})
def interaction_empirical_p(a, b, surv, num_perm=101): ''' Calculate an empirical p-value for an interaction by sampling with replacement. We first test if there is an improvement in model fit by considering the interaction of the two events. If so, we then derive an empirical p-value. ''' a, b = match_series(a, b) if fisher_exact_test(a, b)['odds_ratio'] > 1: int_direction = 'both' else: int_direction = 'neither' r = get_interaction(a, b, surv) mat = np.array([np.random.permutation(a.index) for i in range(num_perm)]) vec = {} for i, idx in enumerate(mat): a_p = pd.Series(list(a.ix[idx]), range(len(idx))) b_p = pd.Series(list(b.ix[idx]), range(len(idx))) surv_p = pd.DataFrame(surv.unstack().ix[a.index].as_matrix(), index=range(len(idx)), columns=['days', 'event']).stack() vec[i] = get_interaction(a_p, b_p, surv_p, int_direction) vec = pd.Series(vec).dropna() empirical_p = 1. * (len(vec) - sum(vec <= r)) / len(vec) return pd.Series({'p': empirical_p, 'interaction': int_direction})
def _interaction(a, b, surv): a, b = a.copy(), b.copy() a.name, b.name = 'a', 'b' m1 = get_cox_ph(surv, covariates=[a, b], formula='Surv(days, event) ~ a + b') if fisher_exact_test(a, b)['odds_ratio'] > 1: int_direction = 'both' else: int_direction = 'neither' int_var = 1.*(combine(a, b) == int_direction) int_var.name = 'interaction' m2 = get_cox_ph(surv, int_var) return pd.Series({'interaction': int_direction, 'p': LR_test(m2, m1)})
def _interaction(a, b, surv): a, b = a.copy(), b.copy() a.name, b.name = 'a', 'b' m1 = get_cox_ph(surv, covariates=[a, b], formula='Surv(days, event) ~ a + b') if fisher_exact_test(a, b)['odds_ratio'] > 1: int_direction = 'both' else: int_direction = 'neither' int_var = 1. * (combine(a, b) == int_direction) int_var.name = 'interaction' m2 = get_cox_ph(surv, int_var) return pd.Series({'interaction': int_direction, 'p': LR_test(m2, m1)})