def get_interaction(a, b, surv, int_direction='both'): ''' Get test statistic (chi2 distributed) of interaction between two event vectors. We define 3 models: 1) a + b 2) a:b 3) a + b + a:b We return the improvement of fit from 2 to 1 minus the improvement of fit from 3 to 2. That is we want to capture as much of the information in the interaction term as possible. ''' a, b = a.copy(), b.copy() a.name, b.name = 'a', 'b' m1 = get_cox_ph(surv, covariates=[a, b], formula='Surv(days, event) ~ a + b') int_var = 1. * (combine(a, b) == int_direction) int_var.name = 'interaction' m2 = get_cox_ph(surv, int_var) m3 = get_cox_ph(surv, combine(a, b)) chi2_a = extract_chi2(m2, m1) chi2_b = extract_chi2(m3, m2) return chi2_a - chi2_b
def get_interaction(a, b, surv, int_direction='both'): ''' Get test statistic (chi2 distributed) of interaction between two event vectors. We define 3 models: 1) a + b 2) a:b 3) a + b + a:b We return the improvement of fit from 2 to 1 minus the improvement of fit from 3 to 2. That is we want to capture as much of the information in the interaction term as possible. ''' a, b = a.copy(), b.copy() a.name, b.name = 'a', 'b' m1 = get_cox_ph(surv, covariates=[a, b], formula='Surv(days, event) ~ a + b') int_var = 1.*(combine(a, b) == int_direction) int_var.name = 'interaction' m2 = get_cox_ph(surv, int_var) m3 = get_cox_ph(surv, combine(a, b)) chi2_a = extract_chi2(m2, m1) chi2_b = extract_chi2(m3, m2) return chi2_a - chi2_b
def get_interactions(df, cov_df, surv, test): binary = df[df.T.describe().ix['unique'] == 2] '''drop redundant features within a data-type''' s = {b for i, (a, v1) in enumerate(binary.iterrows()) for j, (b, v2) in enumerate(binary.iterrows()) if (i < j) and a[0] == b[0] and np.log2(fisher_exact_test(v1, v2)['odds_ratio']) > 4} binary = binary.ix[binary.index.diff(s)] n_tests = (len(binary) * (len(binary) - 1)) / 2 s = pd.DataFrame({(a, b): interaction_empirical_p(v1, v2, surv, num_perm=101) for a, v1 in binary.iterrows() for b, v2 in binary.iterrows() if (a < b) and fisher_exact_test(v1, v2).ix['p'] < (.05 / n_tests) and fisher_exact_test(v1, v2).ix['odds_ratio'] != np.inf and a[0] != b[0]}).T int_pairs = s.ix[s.p < .1].sort('p') int_associations = {} for p, vals in int_pairs.iterrows(): combo = combine(binary.ix[p[0]], binary.ix[p[1]]) vec = combo == vals['interaction'] int_associations[p] = test(vec, surv, cov_df) int_associations = pd.DataFrame(int_associations).T return s, int_associations
def get_interactions(df, cov_df, surv, test): binary = df[df.T.describe().ix['unique'] == 2] #drop redundant features within a data-type s = { b for i, (a, v1) in enumerate(binary.iterrows()) for j, (b, v2) in enumerate(binary.iterrows()) if (i < j) and a[0] == b[0] and np.log2(fisher_exact_test(v1, v2)['odds_ratio']) > 4 } binary = binary.ix[binary.index.diff(s)] n_tests = (len(binary) * (len(binary) - 1)) / 2 s = pd.DataFrame({ (a, b): interaction_empirical_p(v1, v2, surv, num_perm=101) for a, v1 in binary.iterrows() for b, v2 in binary.iterrows() if (a < b) and fisher_exact_test(v1, v2).ix['p'] < (.05 / n_tests) and fisher_exact_test(v1, v2).ix['odds_ratio'] != np.inf and a[0] != b[0] }).T int_pairs = s.ix[s.p < .1].sort('p') int_associations = {} for p, vals in int_pairs.iterrows(): combo = combine(binary.ix[p[0]], binary.ix[p[1]]) vec = combo == vals['interaction'] int_associations[p] = test(vec, surv, cov_df) int_associations = pd.DataFrame(int_associations).T return s, int_associations
def _interaction(a, b, surv): a, b = a.copy(), b.copy() a.name, b.name = 'a', 'b' m1 = get_cox_ph(surv, covariates=[a, b], formula='Surv(days, event) ~ a + b') if fisher_exact_test(a, b)['odds_ratio'] > 1: int_direction = 'both' else: int_direction = 'neither' int_var = 1.*(combine(a, b) == int_direction) int_var.name = 'interaction' m2 = get_cox_ph(surv, int_var) return pd.Series({'interaction': int_direction, 'p': LR_test(m2, m1)})
def get_interaction_simple(a, b, surv, int_direction='both'): ''' Get test statistic (chi2 distributed) of interaction between two event vectors. ''' a, b = a.copy(), b.copy() a.name, b.name = 'a', 'b' m1 = get_cox_ph(surv, covariates=[a, b], formula='Surv(days, event) ~ a + b') int_var = 1.*(combine(a, b) == int_direction) int_var.name = 'interaction' m2 = get_cox_ph(surv, int_var) chi2 = extract_chi2(m2, m1) return chi2
def _interaction(a, b, surv): a, b = a.copy(), b.copy() a.name, b.name = 'a', 'b' m1 = get_cox_ph(surv, covariates=[a, b], formula='Surv(days, event) ~ a + b') if fisher_exact_test(a, b)['odds_ratio'] > 1: int_direction = 'both' else: int_direction = 'neither' int_var = 1. * (combine(a, b) == int_direction) int_var.name = 'interaction' m2 = get_cox_ph(surv, int_var) return pd.Series({'interaction': int_direction, 'p': LR_test(m2, m1)})
def get_interaction_simple(a, b, surv, int_direction='both'): ''' Get test statistic (chi2 distributed) of interaction between two event vectors. ''' a, b = a.copy(), b.copy() a.name, b.name = 'a', 'b' m1 = get_cox_ph(surv, covariates=[a, b], formula='Surv(days, event) ~ a + b') int_var = 1. * (combine(a, b) == int_direction) int_var.name = 'interaction' m2 = get_cox_ph(surv, int_var) chi2 = extract_chi2(m2, m1) return chi2