コード例 #1
0
ファイル: Survival.py プロジェクト: anyone1985/TCGA_Working
def get_interactions(df, cov_df, surv, test):
    binary = df[df.T.describe().ix['unique'] == 2]
    
    '''drop redundant features within a data-type'''
    s = {b for i, (a, v1) in enumerate(binary.iterrows())
           for j, (b, v2) in enumerate(binary.iterrows())
           if (i < j)
           and a[0] == b[0]
           and np.log2(fisher_exact_test(v1, v2)['odds_ratio']) > 4}
    binary = binary.ix[binary.index.diff(s)]

    n_tests = (len(binary) * (len(binary) - 1)) / 2
    s = pd.DataFrame({(a, b): interaction_empirical_p(v1, v2, surv, num_perm=101) 
                          for a, v1 in binary.iterrows()
                          for b, v2 in binary.iterrows()
                          if (a < b)
                          and fisher_exact_test(v1, v2).ix['p'] < (.05 / n_tests)
                          and fisher_exact_test(v1, v2).ix['odds_ratio'] != np.inf
                          and a[0] != b[0]}).T
    int_pairs = s.ix[s.p < .1].sort('p')
    
    int_associations = {}
    for p, vals in int_pairs.iterrows():
        combo = combine(binary.ix[p[0]], binary.ix[p[1]])
        vec = combo == vals['interaction']
        int_associations[p] = test(vec, surv, cov_df) 
    int_associations = pd.DataFrame(int_associations).T
    return s, int_associations
コード例 #2
0
def get_interactions(df, cov_df, surv, test):
    binary = df[df.T.describe().ix['unique'] == 2]

    #drop redundant features within a data-type
    s = {
        b
        for i, (a, v1) in enumerate(binary.iterrows())
        for j, (b, v2) in enumerate(binary.iterrows())
        if (i < j) and a[0] == b[0]
        and np.log2(fisher_exact_test(v1, v2)['odds_ratio']) > 4
    }
    binary = binary.ix[binary.index.diff(s)]

    n_tests = (len(binary) * (len(binary) - 1)) / 2
    s = pd.DataFrame({
        (a, b): interaction_empirical_p(v1, v2, surv, num_perm=101)
        for a, v1 in binary.iterrows() for b, v2 in binary.iterrows()
        if (a < b) and fisher_exact_test(v1, v2).ix['p'] < (.05 / n_tests) and
        fisher_exact_test(v1, v2).ix['odds_ratio'] != np.inf and a[0] != b[0]
    }).T
    int_pairs = s.ix[s.p < .1].sort('p')

    int_associations = {}
    for p, vals in int_pairs.iterrows():
        combo = combine(binary.ix[p[0]], binary.ix[p[1]])
        vec = combo == vals['interaction']
        int_associations[p] = test(vec, surv, cov_df)
    int_associations = pd.DataFrame(int_associations).T
    return s, int_associations
コード例 #3
0
ファイル: Survival.py プロジェクト: anyone1985/TCGA_Working
def interaction_empirical_p_resample(a, b, surv, num_perm=101, check_first=True):
    '''
    Calculate an empirical p-value for an interaction by sampling
    with replacement.  
    
    We first test if there is an improvement in model fit by 
    considering the interaction of the two events.  If so, we 
    then derive an empirical p-value. 
    '''
    a, b = match_series(a, b)
    if fisher_exact_test(a, b)['odds_ratio'] > 1:
        int_direction = 'both'
    else:
        int_direction = 'neither'
    r = get_interaction(a, b, surv)
    if (r < 0) and (check_first is True):
        return pd.Series({'p': 1, 'interaction': int_direction})
    
    mat = np.random.choice(a.index, size=(num_perm, len(a.index)))
    
    vec = {}
    for i, idx in enumerate(mat):
        a_p = pd.Series(list(a.ix[idx]), range(len(idx)))
        b_p = pd.Series(list(b.ix[idx]), range(len(idx)))
        surv_p = pd.DataFrame(surv.unstack().ix[a.index].as_matrix(),
                              index=range(len(idx)),
                              columns=['days', 'event']).stack()
        vec[i] = get_interaction(a_p, b_p, surv_p, int_direction)
    vec = pd.Series(vec)
    
    empirical_p = 1.*(len(vec) - sum(vec <= r)) / len(vec)
    return pd.Series({'p': empirical_p, 'interaction': int_direction})
コード例 #4
0
def interaction_empirical_p(a, b, surv, num_perm=101):
    '''
    Calculate an empirical p-value for an interaction by sampling
    with replacement.  
    
    We first test if there is an improvement in model fit by 
    considering the interaction of the two events.  If so, we 
    then derive an empirical p-value. 
    '''
    a, b = match_series(a, b)
    if fisher_exact_test(a, b)['odds_ratio'] > 1:
        int_direction = 'both'
    else:
        int_direction = 'neither'
    r = get_interaction(a, b, surv)
    mat = np.array([np.random.permutation(a.index) for i in range(num_perm)])

    vec = {}
    for i, idx in enumerate(mat):
        a_p = pd.Series(list(a.ix[idx]), range(len(idx)))
        b_p = pd.Series(list(b.ix[idx]), range(len(idx)))
        surv_p = pd.DataFrame(surv.unstack().ix[a.index].as_matrix(),
                              index=range(len(idx)),
                              columns=['days', 'event']).stack()
        vec[i] = get_interaction(a_p, b_p, surv_p, int_direction)
    vec = pd.Series(vec).dropna()
    empirical_p = 1. * (len(vec) - sum(vec <= r)) / len(vec)
    return pd.Series({'p': empirical_p, 'interaction': int_direction})
コード例 #5
0
ファイル: Survival.py プロジェクト: anyone1985/TCGA_Working
def _interaction(a, b, surv):
    a, b = a.copy(), b.copy()
    a.name, b.name = 'a', 'b'
    m1 = get_cox_ph(surv, covariates=[a, b], formula='Surv(days, event) ~ a + b')
    if fisher_exact_test(a, b)['odds_ratio'] > 1:
        int_direction = 'both'
    else:
        int_direction = 'neither'
        
    int_var = 1.*(combine(a, b) == int_direction)
    int_var.name = 'interaction'
    m2 = get_cox_ph(surv, int_var)
    return pd.Series({'interaction': int_direction, 'p': LR_test(m2, m1)})
コード例 #6
0
def _interaction(a, b, surv):
    a, b = a.copy(), b.copy()
    a.name, b.name = 'a', 'b'
    m1 = get_cox_ph(surv,
                    covariates=[a, b],
                    formula='Surv(days, event) ~ a + b')
    if fisher_exact_test(a, b)['odds_ratio'] > 1:
        int_direction = 'both'
    else:
        int_direction = 'neither'

    int_var = 1. * (combine(a, b) == int_direction)
    int_var.name = 'interaction'
    m2 = get_cox_ph(surv, int_var)
    return pd.Series({'interaction': int_direction, 'p': LR_test(m2, m1)})