def run_feature_matrix(df, test, fp_cutoff=.5): df = df.ix[df.apply(test.check_feature, 1)] if hasattr(test, 'first_pass'): fp = df.apply(test.first_pass, 1) df = df[fp.p < fp_cutoff] full = df.apply(test.full_test, 1) res = pd.concat([full[['LR', 'fmla']], fp], keys=['Full', 'Univariate'], axis=1) if type(res.index[0]) == tuple: # pandas bug res.index = pd.MultiIndex.from_tuples(res.index, names=df.index.names) res = res.join(pd.Series(bhCorrection(res[('Full', 'LR')], n=len(fp)), name=('Full', 'LR_q'))) res = res.join(pd.Series(bhCorrection(res[('Univariate', 'p')], n=len(fp)), name=('Univariate', 'q'))) return res.sort_index(axis=1).sort(columns=[('Full', 'LR')])
def run_feature_matrix(df, test, fp_cutoff=.5): df = df.ix[df.apply(test.check_feature, 1)] if hasattr(test, 'first_pass'): fp = df.apply(test.first_pass, 1) df = df[fp.p < fp_cutoff] full = df.apply(test.full_test, 1) res = pd.concat([full[['LR', 'fmla']], fp], keys=['Full', 'Univariate'], axis=1) if type(res.index[0]) == tuple: # pandas bug res.index = pd.MultiIndex.from_tuples(res.index, names=df.index.names) res = res.join( pd.Series(bhCorrection(res[('Full', 'LR')], n=len(fp)), name=('Full', 'LR_q'))) res = res.join( pd.Series(bhCorrection(res[('Univariate', 'p')], n=len(fp)), name=('Univariate', 'q'))) return res.sort_index(axis=1).sort(columns=[('Full', 'LR')])
def filter_bad_pathways(self, gene_lookup): for clin_type, p_vals in self.p_genes.iteritems(): for gene in set(p_vals.index).intersection(set(gene_lookup)): for pathway in gene_lookup[gene]: if ((pathway in self.q_pathways[clin_type]) and (p_vals[gene] < self.p_pathways[clin_type][pathway]) and (self.hit_matrix.ix[gene].sum() > self.meta_matrix.ix[pathway].sum()*.5)): self.p_pathways[clin_type][pathway] = nan self.q_pathways[clin_type] = bhCorrection(self.p_pathways[clin_type])
def cox_screen(df, surv, axis=1): if axis == 0: df = df.T c = df.apply(pd.value_counts, axis=1).count(1) df = df.ix[c[c > 1].index] rr = df.apply(lambda s: cox(s.dropna(), surv), axis=1) rr[('LR', 'q')] = bhCorrection(rr['LR']['p']) rr = rr.sort([('LR', 'q')]) rr = rr.sortlevel(0, axis=1) return rr
def rna_filter(cn, val, rna): ''' Filter copy number events with rna expression data. Here we test whether the event is associated with a subsequent change in expression in those patients. cn: copy number matrix, should have a MultiIndex, with the gene name in the last level val: value of the copy number to test in [-2, -1, 1, 2] ''' assert val in [-2, -1, 1, 2] change = pd.DataFrame({g: kruskal_pandas(vec == val, rna.ix[g[-1]]) for g, vec in cn.iterrows() if g[-1] in rna.index}).T q_vals = bhCorrection(change.p) filtered = cn.ix[true_index(q_vals < .1)] return filtered
def rna_filter(cn, val, rna): ''' Filter copy number events with rna expression data. Here we test whether the event is associated with a subsequent change in expression in those patients. cn: copy number matrix, should have a MultiIndex, with the gene name in the last level val: value of the copy number to test in [-2, -1, 1, 2] ''' assert val in [-2, -1, 1, 2] change = pd.DataFrame({ g: kruskal_pandas(vec == val, rna.ix[g[-1]]) for g, vec in cn.iterrows() if g[-1] in rna.index }).T q_vals = bhCorrection(change.p) filtered = cn.ix[true_index(q_vals < .1)] return filtered
def lr_screen(df, surv): rr = df.astype(float).apply(log_rank, args=(surv,), axis=1) rr['q'] = bhCorrection(rr.p) rr = rr.sort('p') return rr
def lr_screen(df, surv): rr = df.astype(float).apply(log_rank, args=(surv, ), axis=1) rr['q'] = bhCorrection(rr.p) rr = rr.sort('p') return rr
def screen_feature(vec, test, df): s = pd.DataFrame({f: test(vec, feature) for f,feature in df.iterrows()}).T s['q'] = bhCorrection(s.p) s = s.sort(columns='p') return s