Ejemplo n.º 1
0
def run_feature_matrix(df, test, fp_cutoff=.5):
    df = df.ix[df.apply(test.check_feature, 1)]
    if hasattr(test, 'first_pass'):
        fp = df.apply(test.first_pass, 1)
        df = df[fp.p < fp_cutoff]
    full = df.apply(test.full_test, 1)
    res = pd.concat([full[['LR', 'fmla']], fp], keys=['Full', 'Univariate'], axis=1)
    if type(res.index[0]) == tuple:  # pandas bug
                res.index = pd.MultiIndex.from_tuples(res.index, names=df.index.names) 
    res = res.join(pd.Series(bhCorrection(res[('Full', 'LR')], n=len(fp)),
                          name=('Full', 'LR_q')))
    res = res.join(pd.Series(bhCorrection(res[('Univariate', 'p')], n=len(fp)),
                          name=('Univariate', 'q')))
    return res.sort_index(axis=1).sort(columns=[('Full', 'LR')])
Ejemplo n.º 2
0
def run_feature_matrix(df, test, fp_cutoff=.5):
    df = df.ix[df.apply(test.check_feature, 1)]
    if hasattr(test, 'first_pass'):
        fp = df.apply(test.first_pass, 1)
        df = df[fp.p < fp_cutoff]
    full = df.apply(test.full_test, 1)
    res = pd.concat([full[['LR', 'fmla']], fp],
                    keys=['Full', 'Univariate'],
                    axis=1)
    if type(res.index[0]) == tuple:  # pandas bug
        res.index = pd.MultiIndex.from_tuples(res.index, names=df.index.names)
    res = res.join(
        pd.Series(bhCorrection(res[('Full', 'LR')], n=len(fp)),
                  name=('Full', 'LR_q')))
    res = res.join(
        pd.Series(bhCorrection(res[('Univariate', 'p')], n=len(fp)),
                  name=('Univariate', 'q')))
    return res.sort_index(axis=1).sort(columns=[('Full', 'LR')])
Ejemplo n.º 3
0
 def filter_bad_pathways(self, gene_lookup):
     for clin_type, p_vals in self.p_genes.iteritems():
         for gene in set(p_vals.index).intersection(set(gene_lookup)):
             for pathway in gene_lookup[gene]:
                 if ((pathway in self.q_pathways[clin_type]) and 
                    (p_vals[gene] < self.p_pathways[clin_type][pathway]) and
                     (self.hit_matrix.ix[gene].sum() > 
                      self.meta_matrix.ix[pathway].sum()*.5)):
                     self.p_pathways[clin_type][pathway] = nan
         self.q_pathways[clin_type] = bhCorrection(self.p_pathways[clin_type])
Ejemplo n.º 4
0
def cox_screen(df, surv, axis=1):
    if axis == 0:
        df = df.T
    c = df.apply(pd.value_counts, axis=1).count(1)
    df = df.ix[c[c > 1].index]
    rr = df.apply(lambda s: cox(s.dropna(), surv), axis=1)
    
    rr[('LR', 'q')] = bhCorrection(rr['LR']['p'])
    rr = rr.sort([('LR', 'q')])
    rr = rr.sortlevel(0, axis=1)
    return rr
Ejemplo n.º 5
0
def cox_screen(df, surv, axis=1):
    if axis == 0:
        df = df.T
    c = df.apply(pd.value_counts, axis=1).count(1)
    df = df.ix[c[c > 1].index]
    rr = df.apply(lambda s: cox(s.dropna(), surv), axis=1)

    rr[('LR', 'q')] = bhCorrection(rr['LR']['p'])
    rr = rr.sort([('LR', 'q')])
    rr = rr.sortlevel(0, axis=1)
    return rr
Ejemplo n.º 6
0
def rna_filter(cn, val, rna):
    '''
    Filter copy number events with rna expression data.
    Here we test whether the event is associated with a subsequent
    change in expression in those patients. 
    
    cn: copy number matrix, should have a MultiIndex, with the gene name
        in the last level
    val: value of the copy number to test in [-2, -1, 1, 2] 
    '''
    assert val in [-2, -1, 1, 2]
    change = pd.DataFrame({g: kruskal_pandas(vec == val, rna.ix[g[-1]])
                           for g, vec in cn.iterrows() 
                           if g[-1] in rna.index}).T
    q_vals = bhCorrection(change.p)
    filtered = cn.ix[true_index(q_vals < .1)]
    return filtered
Ejemplo n.º 7
0
def rna_filter(cn, val, rna):
    '''
    Filter copy number events with rna expression data.
    Here we test whether the event is associated with a subsequent
    change in expression in those patients. 
    
    cn: copy number matrix, should have a MultiIndex, with the gene name
        in the last level
    val: value of the copy number to test in [-2, -1, 1, 2] 
    '''
    assert val in [-2, -1, 1, 2]
    change = pd.DataFrame({
        g: kruskal_pandas(vec == val, rna.ix[g[-1]])
        for g, vec in cn.iterrows() if g[-1] in rna.index
    }).T
    q_vals = bhCorrection(change.p)
    filtered = cn.ix[true_index(q_vals < .1)]
    return filtered
Ejemplo n.º 8
0
def lr_screen(df, surv):
    rr = df.astype(float).apply(log_rank, args=(surv,), axis=1)
    rr['q'] = bhCorrection(rr.p)
    rr = rr.sort('p')
    return rr
Ejemplo n.º 9
0
def lr_screen(df, surv):
    rr = df.astype(float).apply(log_rank, args=(surv, ), axis=1)
    rr['q'] = bhCorrection(rr.p)
    rr = rr.sort('p')
    return rr
Ejemplo n.º 10
0
def screen_feature(vec, test, df):
    s = pd.DataFrame({f: test(vec, feature) for f,feature in df.iterrows()}).T
    s['q'] = bhCorrection(s.p)
    s = s.sort(columns='p')
    return s