Esempio n. 1
0
def get_interaction(a, b, surv, int_direction='both'):
    '''
    Get test statistic (chi2 distributed) of interaction between 
    two event vectors.  
    
    We define 3 models: 
        1) a + b
        2) a:b
        3) a + b + a:b
        
    We return the improvement of fit from 2 to 1 minus the 
    improvement of fit from 3 to 2. That is we want to capture
    as much of the information in the interaction term as possible.
    '''
    a, b = a.copy(), b.copy()
    a.name, b.name = 'a', 'b'
    m1 = get_cox_ph(surv,
                    covariates=[a, b],
                    formula='Surv(days, event) ~ a + b')
    int_var = 1. * (combine(a, b) == int_direction)
    int_var.name = 'interaction'
    m2 = get_cox_ph(surv, int_var)

    m3 = get_cox_ph(surv, combine(a, b))

    chi2_a = extract_chi2(m2, m1)
    chi2_b = extract_chi2(m3, m2)
    return chi2_a - chi2_b
Esempio n. 2
0
def get_interaction(a, b, surv, int_direction='both'):
    '''
    Get test statistic (chi2 distributed) of interaction between 
    two event vectors.  
    
    We define 3 models: 
        1) a + b
        2) a:b
        3) a + b + a:b
        
    We return the improvement of fit from 2 to 1 minus the 
    improvement of fit from 3 to 2. That is we want to capture
    as much of the information in the interaction term as possible.
    '''
    a, b = a.copy(), b.copy()
    a.name, b.name = 'a', 'b'
    m1 = get_cox_ph(surv, covariates=[a, b],
                    formula='Surv(days, event) ~ a + b')
    int_var = 1.*(combine(a, b) == int_direction)
    int_var.name = 'interaction'
    m2 = get_cox_ph(surv, int_var)
    
    m3 = get_cox_ph(surv, combine(a, b))
    
    chi2_a = extract_chi2(m2, m1)
    chi2_b = extract_chi2(m3, m2)
    return chi2_a - chi2_b
Esempio n. 3
0
def get_interactions(df, cov_df, surv, test):
    binary = df[df.T.describe().ix['unique'] == 2]
    
    '''drop redundant features within a data-type'''
    s = {b for i, (a, v1) in enumerate(binary.iterrows())
           for j, (b, v2) in enumerate(binary.iterrows())
           if (i < j)
           and a[0] == b[0]
           and np.log2(fisher_exact_test(v1, v2)['odds_ratio']) > 4}
    binary = binary.ix[binary.index.diff(s)]

    n_tests = (len(binary) * (len(binary) - 1)) / 2
    s = pd.DataFrame({(a, b): interaction_empirical_p(v1, v2, surv, num_perm=101) 
                          for a, v1 in binary.iterrows()
                          for b, v2 in binary.iterrows()
                          if (a < b)
                          and fisher_exact_test(v1, v2).ix['p'] < (.05 / n_tests)
                          and fisher_exact_test(v1, v2).ix['odds_ratio'] != np.inf
                          and a[0] != b[0]}).T
    int_pairs = s.ix[s.p < .1].sort('p')
    
    int_associations = {}
    for p, vals in int_pairs.iterrows():
        combo = combine(binary.ix[p[0]], binary.ix[p[1]])
        vec = combo == vals['interaction']
        int_associations[p] = test(vec, surv, cov_df) 
    int_associations = pd.DataFrame(int_associations).T
    return s, int_associations
Esempio n. 4
0
def get_interactions(df, cov_df, surv, test):
    binary = df[df.T.describe().ix['unique'] == 2]

    #drop redundant features within a data-type
    s = {
        b
        for i, (a, v1) in enumerate(binary.iterrows())
        for j, (b, v2) in enumerate(binary.iterrows())
        if (i < j) and a[0] == b[0]
        and np.log2(fisher_exact_test(v1, v2)['odds_ratio']) > 4
    }
    binary = binary.ix[binary.index.diff(s)]

    n_tests = (len(binary) * (len(binary) - 1)) / 2
    s = pd.DataFrame({
        (a, b): interaction_empirical_p(v1, v2, surv, num_perm=101)
        for a, v1 in binary.iterrows() for b, v2 in binary.iterrows()
        if (a < b) and fisher_exact_test(v1, v2).ix['p'] < (.05 / n_tests) and
        fisher_exact_test(v1, v2).ix['odds_ratio'] != np.inf and a[0] != b[0]
    }).T
    int_pairs = s.ix[s.p < .1].sort('p')

    int_associations = {}
    for p, vals in int_pairs.iterrows():
        combo = combine(binary.ix[p[0]], binary.ix[p[1]])
        vec = combo == vals['interaction']
        int_associations[p] = test(vec, surv, cov_df)
    int_associations = pd.DataFrame(int_associations).T
    return s, int_associations
Esempio n. 5
0
def _interaction(a, b, surv):
    a, b = a.copy(), b.copy()
    a.name, b.name = 'a', 'b'
    m1 = get_cox_ph(surv, covariates=[a, b], formula='Surv(days, event) ~ a + b')
    if fisher_exact_test(a, b)['odds_ratio'] > 1:
        int_direction = 'both'
    else:
        int_direction = 'neither'
        
    int_var = 1.*(combine(a, b) == int_direction)
    int_var.name = 'interaction'
    m2 = get_cox_ph(surv, int_var)
    return pd.Series({'interaction': int_direction, 'p': LR_test(m2, m1)})
Esempio n. 6
0
def get_interaction_simple(a, b, surv, int_direction='both'):
    '''
    Get test statistic (chi2 distributed) of interaction between 
    two event vectors.  
    '''
    a, b = a.copy(), b.copy()
    a.name, b.name = 'a', 'b'
    m1 = get_cox_ph(surv, covariates=[a, b],
                    formula='Surv(days, event) ~ a + b')

    int_var = 1.*(combine(a, b) == int_direction)
    int_var.name = 'interaction'
    m2 = get_cox_ph(surv, int_var)
    chi2 = extract_chi2(m2, m1)
    return chi2
Esempio n. 7
0
def _interaction(a, b, surv):
    a, b = a.copy(), b.copy()
    a.name, b.name = 'a', 'b'
    m1 = get_cox_ph(surv,
                    covariates=[a, b],
                    formula='Surv(days, event) ~ a + b')
    if fisher_exact_test(a, b)['odds_ratio'] > 1:
        int_direction = 'both'
    else:
        int_direction = 'neither'

    int_var = 1. * (combine(a, b) == int_direction)
    int_var.name = 'interaction'
    m2 = get_cox_ph(surv, int_var)
    return pd.Series({'interaction': int_direction, 'p': LR_test(m2, m1)})
Esempio n. 8
0
def get_interaction_simple(a, b, surv, int_direction='both'):
    '''
    Get test statistic (chi2 distributed) of interaction between 
    two event vectors.  
    '''
    a, b = a.copy(), b.copy()
    a.name, b.name = 'a', 'b'
    m1 = get_cox_ph(surv,
                    covariates=[a, b],
                    formula='Surv(days, event) ~ a + b')

    int_var = 1. * (combine(a, b) == int_direction)
    int_var.name = 'interaction'
    m2 = get_cox_ph(surv, int_var)
    chi2 = extract_chi2(m2, m1)
    return chi2