Esempio n. 1
0
def extract_pc_filtered(df, pc_threshold=.2, filter_down=True):
    '''
    First pre-filters for patients with no tumor/normal change.
    Then normalizes by normals. 
    '''
    if ('11' in df.columns.levels[1]) and filter_down:
        tt = df.xs('11', axis=1, level=1)
        rr = df.apply(exp_change, 1).sort('p')
        m, s = tt.mean(1), tt.std(1)
        df_n = df.xs('01', axis=1, level=1)
        df_n = ((df_n.T - m) / s).T
        df_n = df_n.ix[true_index(rr.p < .05)]
    else:  # No matched normals
        df_n = df.xs('01', axis=1, level=1)
        df_n = ((df_n.T - df_n.mean(1)) / df_n.std(1)).T
    pc = extract_pc(df_n, pc_threshold, standardize=False)
    return pc
Esempio n. 2
0
def extract_pc_filtered(df, pc_threshold=.2, filter_down=True):
    '''
    First pre-filters for patients with no tumor/normal change.
    Then normalizes by normals. 
    '''
    if ('11' in df.columns.levels[1]) and filter_down:
        tt = df.xs('11', axis=1, level=1)
        rr = df.apply(exp_change, 1).sort('p')
        m, s = tt.mean(1), tt.std(1)
        df_n = df.xs('01', axis=1, level=1)
        df_n = ((df_n.T - m) / s).T
        df_n = df_n.ix[true_index(rr.p < .05)]
    else: #No matched normals
        df_n = df.xs('01', axis=1, level=1)
        df_n = ((df_n.T - df_n.mean(1)) / df_n.std(1)).T
    pc = extract_pc(df_n, pc_threshold, standardize=False)
    return pc
Esempio n. 3
0
 def _get_real_features(self):
     binary, singles, real = extract_features(self.df)
     background_df = real.ix[real.index.diff(singles.index)].dropna()
     background = extract_pc(background_df, 0)
     ss = screen_feature(background['pat_vec'], pearson_pandas, singles)
     singles = singles.ix[ss.p > 10e-5]
     
     singles = ((singles.T - singles.mean(1)) / singles.std(1)).T
     U, S, pc = frame_svd(singles)
     
     self.features['binary'] = binary
     self.features['real'] = singles
     self.global_vars['background'] = background['pat_vec']
     self.global_vars['filtered_pc1'] = pc[0]
     self.global_vars['filtered_pc2'] = pc[1]
     self.global_loadings['background'] = background['gene_vec']
     self.global_loadings['filtered_pc1'] = U[0]
     self.global_loadings['filtered_pc2'] = U[1]
def peel_pc(df):
    '''
    Wrapper around extract_pc.
    Flips the PC slightly differently based on correlation with the mean. 
    Does not standardize data for PCA due to underlying distribution of
    beta values.
    '''
    try:
        r = extract_pc(df-.5)
        l,r,p = r['gene_vec'], r['pat_vec'], r['pct_var']
        mean = df.mean(1)
        if l.corr(mean) < 0:
            l = l*-1
            r = r*-1
        return l,r,p
    except:
        r = df.mean()
        return np.nan, r, np.nan
Esempio n. 5
0
 def _get_real_features(self):
     binary, singles, real = extract_features(self.df)
     background_df = real.ix[real.index.diff(singles.index)].dropna()
     background = extract_pc(background_df, 0)
     ss = screen_feature(background['pat_vec'], pearson_pandas, singles)
     singles = singles.ix[ss.p > 10e-5]
     
     singles = ((singles.T - singles.mean(1)) / singles.std(1)).T
     U, S, pc = frame_svd(singles)
     
     self.features['binary'] = binary
     self.features['real'] = singles
     self.global_vars['background'] = background['pat_vec']
     self.global_vars['filtered_pc1'] = pc[0]
     self.global_vars['filtered_pc2'] = pc[1]
     self.global_loadings['background'] = background['gene_vec']
     self.global_loadings['filtered_pc1'] = U[0]
     self.global_loadings['filtered_pc2'] = U[1]
Esempio n. 6
0
def peel_pc(df):
    '''
    Wrapper around extract_pc.
    Flips the PC slightly differently based on correlation with the mean. 
    Does not standardize data for PCA due to underlying distribution of
    beta values.
    '''
    try:
        r = extract_pc(df - .5)
        l, r, p = r['gene_vec'], r['pat_vec'], r['pct_var']
        mean = df.mean(1)
        if l.corr(mean) < 0:
            l = l * -1
            r = r * -1
        return l, r, p
    except:
        r = df.mean()
        return np.nan, r, np.nan
Esempio n. 7
0
def run_clinical_real(cancer, clinical, data_path, gene_sets,
                      survival_tests, real_variables, binary_variables,
                      data_type='expression', drop_pc=False):
    
    if data_type == 'expression':
        data_matrix = read_rnaSeq(cancer, data_path)
        data_matrix = data_matrix.groupby(by=lambda n: n.split('|')[0]).mean()
    elif data_type == 'expression_array':
        data_matrix = read_mrna(cancer, data_path)
    elif data_type == 'methylation':
        data_matrix = read_methylation(cancer, data_path)
    if drop_pc:
        data_matrix = drop_first_norm_pc(data_matrix)
    pc = dict((p, extract_pc(data_matrix.ix[g])) for p, g in 
              gene_sets.iteritems())
    pc = DataFrame(dict((p, (v - v.mean()) / v.std()) for p,v in pc.iteritems() if 
                   type(v) != type(None))).T
    #clinical['pc'] = extract_pc(data_matrix.dropna(), pc_threshold=0)
    tests  = get_tests(clinical, survival_tests, real_variables, 
                       binary_variables, var_type='real')
    #return locals()
    p_pathways, q_pathways = run_tests(tests, pc)
    return locals()