Beispiel #1
0
def get_interactions(df, cov_df, surv, test):
    binary = df[df.T.describe().ix['unique'] == 2]
    
    '''drop redundant features within a data-type'''
    s = {b for i, (a, v1) in enumerate(binary.iterrows())
           for j, (b, v2) in enumerate(binary.iterrows())
           if (i < j)
           and a[0] == b[0]
           and np.log2(fisher_exact_test(v1, v2)['odds_ratio']) > 4}
    binary = binary.ix[binary.index.diff(s)]

    n_tests = (len(binary) * (len(binary) - 1)) / 2
    s = pd.DataFrame({(a, b): interaction_empirical_p(v1, v2, surv, num_perm=101) 
                          for a, v1 in binary.iterrows()
                          for b, v2 in binary.iterrows()
                          if (a < b)
                          and fisher_exact_test(v1, v2).ix['p'] < (.05 / n_tests)
                          and fisher_exact_test(v1, v2).ix['odds_ratio'] != np.inf
                          and a[0] != b[0]}).T
    int_pairs = s.ix[s.p < .1].sort('p')
    
    int_associations = {}
    for p, vals in int_pairs.iterrows():
        combo = combine(binary.ix[p[0]], binary.ix[p[1]])
        vec = combo == vals['interaction']
        int_associations[p] = test(vec, surv, cov_df) 
    int_associations = pd.DataFrame(int_associations).T
    return s, int_associations
Beispiel #2
0
def get_interactions(df, cov_df, surv, test):
    binary = df[df.T.describe().ix['unique'] == 2]

    #drop redundant features within a data-type
    s = {
        b
        for i, (a, v1) in enumerate(binary.iterrows())
        for j, (b, v2) in enumerate(binary.iterrows())
        if (i < j) and a[0] == b[0]
        and np.log2(fisher_exact_test(v1, v2)['odds_ratio']) > 4
    }
    binary = binary.ix[binary.index.diff(s)]

    n_tests = (len(binary) * (len(binary) - 1)) / 2
    s = pd.DataFrame({
        (a, b): interaction_empirical_p(v1, v2, surv, num_perm=101)
        for a, v1 in binary.iterrows() for b, v2 in binary.iterrows()
        if (a < b) and fisher_exact_test(v1, v2).ix['p'] < (.05 / n_tests) and
        fisher_exact_test(v1, v2).ix['odds_ratio'] != np.inf and a[0] != b[0]
    }).T
    int_pairs = s.ix[s.p < .1].sort('p')

    int_associations = {}
    for p, vals in int_pairs.iterrows():
        combo = combine(binary.ix[p[0]], binary.ix[p[1]])
        vec = combo == vals['interaction']
        int_associations[p] = test(vec, surv, cov_df)
    int_associations = pd.DataFrame(int_associations).T
    return s, int_associations
Beispiel #3
0
def interaction_empirical_p_resample(a, b, surv, num_perm=101, check_first=True):
    '''
    Calculate an empirical p-value for an interaction by sampling
    with replacement.  
    
    We first test if there is an improvement in model fit by 
    considering the interaction of the two events.  If so, we 
    then derive an empirical p-value. 
    '''
    a, b = match_series(a, b)
    if fisher_exact_test(a, b)['odds_ratio'] > 1:
        int_direction = 'both'
    else:
        int_direction = 'neither'
    r = get_interaction(a, b, surv)
    if (r < 0) and (check_first is True):
        return pd.Series({'p': 1, 'interaction': int_direction})
    
    mat = np.random.choice(a.index, size=(num_perm, len(a.index)))
    
    vec = {}
    for i, idx in enumerate(mat):
        a_p = pd.Series(list(a.ix[idx]), range(len(idx)))
        b_p = pd.Series(list(b.ix[idx]), range(len(idx)))
        surv_p = pd.DataFrame(surv.unstack().ix[a.index].as_matrix(),
                              index=range(len(idx)),
                              columns=['days', 'event']).stack()
        vec[i] = get_interaction(a_p, b_p, surv_p, int_direction)
    vec = pd.Series(vec)
    
    empirical_p = 1.*(len(vec) - sum(vec <= r)) / len(vec)
    return pd.Series({'p': empirical_p, 'interaction': int_direction})
Beispiel #4
0
def interaction_empirical_p(a, b, surv, num_perm=101):
    '''
    Calculate an empirical p-value for an interaction by sampling
    with replacement.  
    
    We first test if there is an improvement in model fit by 
    considering the interaction of the two events.  If so, we 
    then derive an empirical p-value. 
    '''
    a, b = match_series(a, b)
    if fisher_exact_test(a, b)['odds_ratio'] > 1:
        int_direction = 'both'
    else:
        int_direction = 'neither'
    r = get_interaction(a, b, surv)
    mat = np.array([np.random.permutation(a.index) for i in range(num_perm)])

    vec = {}
    for i, idx in enumerate(mat):
        a_p = pd.Series(list(a.ix[idx]), range(len(idx)))
        b_p = pd.Series(list(b.ix[idx]), range(len(idx)))
        surv_p = pd.DataFrame(surv.unstack().ix[a.index].as_matrix(),
                              index=range(len(idx)),
                              columns=['days', 'event']).stack()
        vec[i] = get_interaction(a_p, b_p, surv_p, int_direction)
    vec = pd.Series(vec).dropna()
    empirical_p = 1. * (len(vec) - sum(vec <= r)) / len(vec)
    return pd.Series({'p': empirical_p, 'interaction': int_direction})
Beispiel #5
0
def _interaction(a, b, surv):
    a, b = a.copy(), b.copy()
    a.name, b.name = 'a', 'b'
    m1 = get_cox_ph(surv, covariates=[a, b], formula='Surv(days, event) ~ a + b')
    if fisher_exact_test(a, b)['odds_ratio'] > 1:
        int_direction = 'both'
    else:
        int_direction = 'neither'
        
    int_var = 1.*(combine(a, b) == int_direction)
    int_var.name = 'interaction'
    m2 = get_cox_ph(surv, int_var)
    return pd.Series({'interaction': int_direction, 'p': LR_test(m2, m1)})
Beispiel #6
0
def _interaction(a, b, surv):
    a, b = a.copy(), b.copy()
    a.name, b.name = 'a', 'b'
    m1 = get_cox_ph(surv,
                    covariates=[a, b],
                    formula='Surv(days, event) ~ a + b')
    if fisher_exact_test(a, b)['odds_ratio'] > 1:
        int_direction = 'both'
    else:
        int_direction = 'neither'

    int_var = 1. * (combine(a, b) == int_direction)
    int_var.name = 'interaction'
    m2 = get_cox_ph(surv, int_var)
    return pd.Series({'interaction': int_direction, 'p': LR_test(m2, m1)})