예제 #1
0
 def get_patient_set(self, filters):
     f1 = list(filters)
     filter_df = pd.concat(f1, axis=1)
     clinical_filter = filter_df.dropna().sum(1) == 0
     keepers_o = H.true_index(clinical_filter)
     keepers_o = keepers_o.intersection(self.mut_df.columns)
     keepers_o = keepers_o.intersection(self.cna_df.columns)
     return keepers_o
예제 #2
0
def mut_filter(df, rate, binary_cutoff=12):
    '''
    Filter out mutation features, ensuring that a feature
    is not entirely an artifact of mutation rate.
    '''
    df = df[df.sum(1) >= binary_cutoff]
    cc = H.screen_feature(rate, rev_kruskal, df)
    
    fc_apply = lambda s: fc(s, rate)
    direction = df.apply(fc_apply, axis=1)
    direction.name = 'direction'
    
    cc = cc.join(direction)
    cc = cc[cc.direction==False]
    
    df = df.ix[H.true_index(cc.p > .01)]
    df = df.dropna(axis=1)
    return df
예제 #3
0
def process_real(df):
    '''
    Process real valued feature into binary feature.
    '''
    df_c = df.copy()
    df_c = df_c.apply(lambda s: H.to_quants(s, std=1), axis=1)
    df_c = df_c > 0
    if type(df.index) == pd.MultiIndex:
        df_c.index = map(lambda s: '_'.join(s), df_c.index)
    return df_c.T
예제 #4
0
파일: Screen.py 프로젝트: xulijunji/TCGA
def process_real(df):
    """
    Process real valued feature into binary feature.
    """
    df_c = df.copy()
    df_c = df_c.apply(lambda s: H.to_quants(s, std=1), axis=1)
    df_c = df_c > 0
    if type(df.index) == pd.MultiIndex:
        df_c.index = map(lambda s: '_'.join(s), df_c.index)
    return df_c.T
예제 #5
0
파일: Screen.py 프로젝트: xulijunji/TCGA
 def get_patient_set(self, filters):
     f1 = list(filters)
     filter_df = pd.concat(f1, axis=1)
     clinical_filter = filter_df.dropna().sum(1) == 0
     keepers_o = H.true_index(clinical_filter)
     keepers_o = keepers_o.intersection(self.mut_df.columns)
     keepers_o = keepers_o.intersection(self.cna_df.columns)
     keepers_o = keepers_o.intersection(self.surv.unstack().index)
     keepers_o = keepers_o.intersection(self.rna_df.columns)
     keepers_o = keepers_o.intersection(self.mirna_df.columns)
     return keepers_o
예제 #6
0
파일: Screen.py 프로젝트: Krysia/TCGA
def mut_filter(df, rate, binary_cutoff=12):
    """
    Filter out mutation features, ensuring that a feature
    is not entirely an artifact of mutation rate.
    """
    get_min_count = lambda s: s.value_counts().min() if len(s.unique()) > 1 else -1
    df = df[df.apply(get_min_count, axis=1) > binary_cutoff]
    cc = H.screen_feature(rate, rev_kruskal, df)

    fc_apply = lambda s: fc(s, rate)
    direction = df.apply(fc_apply, axis=1)
    direction.name = 'direction'

    cc = cc.join(direction)
    #cc = cc[cc.direction == False]
    #return cc

    df = df.ix[H.true_index((cc.p > .01) | (cc.direction == True))]
    df = df.dropna(axis=1)
    return df
예제 #7
0
파일: Screen.py 프로젝트: xulijunji/TCGA
def mut_filter(df, rate, binary_cutoff=12):
    """
    Filter out mutation features, ensuring that a feature
    is not entirely an artifact of mutation rate.
    """
    get_min_count = lambda s: s.value_counts().min() if len(s.unique()
                                                            ) > 1 else -1
    df = df[df.apply(get_min_count, axis=1) > binary_cutoff]
    cc = H.screen_feature(rate, rev_kruskal, df)

    fc_apply = lambda s: fc(s, rate)
    direction = df.apply(fc_apply, axis=1)
    direction.name = 'direction'

    cc = cc.join(direction)
    #cc = cc[cc.direction == False]
    #return cc

    df = df.ix[H.true_index((cc.p > .01) | (cc.direction == True))]
    df = df.dropna(axis=1)
    return df
예제 #8
0
def corrections(vec):
    '''
    Correct p-values multiple ways along multi-index.
    '''
    bonf_all = vec * len(vec)
    bonf_within = vec.groupby(level=0).apply(lambda s: s*len(s))
    
    bh_all = H.bhCorrection(vec)
    bh_within = vec.groupby(level=0).apply(H.bhCorrection).order()
    
    two_step = bh_within * len(vec.groupby(level=0).size())
    q = pd.concat([vec, bh_within, bh_all, bonf_all, bonf_within, two_step],
                  keys=['uncorrected', 'bh_within', 'bh_all', 'bonf_all', 'bonf_within',
                        'two_step'], axis=1)
    return q
예제 #9
0
파일: Screen.py 프로젝트: xulijunji/TCGA
def corrections(vec):
    """
    Correct p-values multiple ways along multi-index.
    """
    bonf_all = vec * len(vec)
    bonf_within = vec.groupby(level=0).apply(lambda s: s * len(s))

    bh_all = H.bhCorrection(vec)
    bh_within = vec.groupby(level=0).apply(H.bhCorrection).order()

    two_step = bh_within * len(vec.groupby(level=0).size())
    q = pd.concat([vec, bh_within, bh_all, bonf_all, bonf_within, two_step],
                  keys=[
                      'uncorrected', 'bh_within', 'bh_all', 'bonf_all',
                      'bonf_within', 'two_step'
                  ],
                  axis=1)
    return q
예제 #10
0
def remove_redundant_pathways(pathways, background, cutoff=.7,
                              binarize=False):
    '''
    Screens out redundant pathways with high correlation above _cutoff_.
    Pathways are ranked based on lack of correlation to the background signal.
    Then if two pathways have high correlation the lower ranked pathway is 
    removed.  
    '''
    bg = H.screen_feature(background, spearman_pandas, pathways)
    dd = pathways.ix[bg.index[::-1]].T.corr()
    dd = pd.DataFrame(np.triu(dd, 1), dd.index, dd.index)
    dd = dd.replace(0, np.nan).stack()
    drop = dd[dd.abs() > cutoff].index.get_level_values(1)
    pathways_to_keep = pathways.index.diff(drop.unique())
    pathways =  pathways.ix[pathways_to_keep]
    if binarize is False:
        return pathways
    else:
        binary_pathways = pathways.apply(binarize_feature, 1)
        return binary_pathways