Exemple #1
0
def get_global_vars(data_path, cancer, patients=None):
    '''
    Get compiled DataFrame of global molecular variables from Firehose
    data.  Returns a feature by patient DataFrame with (data-type, variable)
    on the columns and patient barcodes on the index.
    '''
    try:
        data_matrix = FH.read_rnaSeq(data_path, cancer, patients)
        U, S, vH = frame_svd(data_matrix)
        exp_pc = pd.DataFrame({'pc1': vH[0], 'pc2': vH[1]})
    except:
        exp_pc = pd.DataFrame()
        
    try:
        data_matrix = read_methylation(data_path, cancer, patients)
        U, S, vH = frame_svd(data_matrix)
        meth_pc = pd.DataFrame({'pc1': vH[0], 'pc2': vH[1]})
    except:
        meth_pc = pd.DataFrame()
        
    try:
        meth_age, amar = 'FAIL', 'FAIL'
        # meth_age, amar = get_age_signal(data_path, cancer) 
        meth_pc = meth_pc.join(meth_age).join(amar)
        print 'Should probably check this out'
    except:
        pass
    
    cna_rates = get_cna_rates(data_path, cancer, patients)
    mutation_rates = get_mutation_rates(data_path, cancer, patients)
    
    gv = pd.concat([exp_pc, meth_pc, cna_rates, mutation_rates],
                    keys=['mRNASeq', 'methylation', 'cna', 'mutation'], axis=1)
    gv = gv.dropna(how='all', axis=1)
    return gv
def pathway_mutation_section_exp(cancer, gene_sets, cutoff=.25):
    #Format data for report
    path = cancer.report_folder + '/'
    pathway_table_file = path + 'pathway_table.csv'
    pathway_table = format_pathway_table_exp(cancer, gene_sets) 
    if 'survival' in pathway_table:
        pathway_table.sort(columns='survival')
    pathway_table.to_csv(pathway_table_file)
    keepers = cancer.q_pathways[(cancer.q_pathways < .25).sum(1) > 0].index
    pathway_table = pathway_table.ix[keepers]
    if 'survival' in pathway_table:
        pathway_table = pathway_table.sort(columns='survival')
    pathway_table = pathway_table.head(20)
    pathway_table_r = com.convert_to_r_dataframe(pathway_table.replace(nan, 1.23)) #@UndefinedVariable
    if len(pathway_table) == 0:
        return nz.addTo(nz.newSubSection('Expressed Pathways'), nz.newParagraph(''))
    
    #Overview
    tableCaption1 = ('Association of pathway level expression patterns with patient' + 
                     'clinical features.')
    table1 = nz.newTable(pathway_table_r, tableCaption1, file=pathway_table_file, 
                             significantDigits=2);                      
   
    #Fill in the details
    pathway_pos = dict((p,i) for i,p in enumerate(pathway_table.index))
    col_pos = dict((c,i) for i,c in enumerate(pathway_table.columns))
    
    #age scatter plots
    for p in (pathway_table['age'][pathway_table['age'] < cutoff]).index:
        fig_file = path + FIG_EXT + p + '_age.png'
        draw_pathway_age_scatter(p, cancer, fig_file)
        age_fig1 = nz.newFigure(fig_file, 'Age of patients with or without' +
                                           'mutation to pathway.')
        result1 = nz.addTo(nz.newResult('', isSignificant='TRUE'),
                           nz.addTo(nz.newSection(p), age_fig1))
        table1 = nz.addTo(table1, result1, row=pathway_pos[p]+1, 
                          column=col_pos['age']+1)
        
    #survival curves
    for p in (pathway_table['survival'][pathway_table['survival'] < cutoff]).index:
        fig_file = path + FIG_EXT + p + '_survival.png'
        data_frame = cancer.data_matrix.ix[gene_sets[p]].dropna()
        U,S,vH = frame_svd(((data_frame.T - data_frame.mean(1)) / data_frame.std(1)).T)
        
        strat = (vH[0] > vH[0].std()).astype(int) - (vH[0] < -vH[0].std()) + 1
        draw_survival_curves(cancer.clinical, Series(strat, name='pc'), 
                             labels=['low','mid','high'], filename=fig_file)
        sv_fig1 = nz.newFigure(fig_file, 'Survival of patients with ' + 
                                          'varying levels of pathway expression.')
        fig_file2 = path + FIG_EXT + p + '.svg'
        draw_pathway_eig_bar(U, fig_file2)
        sv_fig_2 = nz.newFigure(fig_file2, 'Loading for first eigen-patient.')
        result1 = nz.addTo(nz.newResult('', isSignificant='TRUE'),
                           nz.addTo(nz.newSection(p), sv_fig1, sv_fig_2))
        table1 = nz.addTo(table1, result1, row=pathway_pos[p]+1, 
                          column=col_pos['survival']+1)
        
    section = nz.addTo(nz.newSubSection('Pathway Mutations'), table1)
    return section
def add_eig_bar(pathway, cancer, table, pos, fig_path):
    fig_file = cancer.report_folder + '/' + FIG_EXT + pathway + + '.svg'
    if os.path.isfile(fig_file):
        data_frame = cancer.data_matrix.ix[cancer.gene_sets[pathway]].dropna()
        U,S,vH = frame_svd(((data_frame.T - data_frame.mean(1)) / data_frame.std(1)).T)
        draw_pathway_eig_bar(U, fig_file)
    sv_fig = nz.newFigure(fig_file, 'Loading for first eigen-patient.')
    result1 = nz.addTo(nz.newResult('', isSignificant='TRUE'),
                       nz.addTo(nz.newSection(pathway), sv_fig))
    table = nz.addTo(table, result1, row=pos[0], column=pos[1])
Exemple #4
0
def get_global_vars(data_path, cancer, patients=None):
    '''
    Get compiled DataFrame of global molecular variables from Firehose
    data.  Returns a feature by patient DataFrame with (data-type, variable)
    on the columns and patient barcodes on the index.
    '''
    try:
        data_matrix = FH.read_rnaSeq(data_path, cancer, patients)
        U, S, vH = frame_svd(data_matrix)
        exp_pc = pd.DataFrame({'pc1': vH[0], 'pc2': vH[1]})
    except:
        exp_pc = pd.DataFrame()

    try:
        data_matrix = read_methylation(data_path, cancer, patients)
        U, S, vH = frame_svd(data_matrix)
        meth_pc = pd.DataFrame({'pc1': vH[0], 'pc2': vH[1]})
    except:
        meth_pc = pd.DataFrame()

    try:
        meth_age, amar = 'FAIL', 'FAIL'
        # meth_age, amar = get_age_signal(data_path, cancer)
        meth_pc = meth_pc.join(meth_age).join(amar)
        print 'Should probably check this out'
    except:
        pass

    cna_rates = get_cna_rates(data_path, cancer, patients)
    mutation_rates = get_mutation_rates(data_path, cancer, patients)

    gv = pd.concat([exp_pc, meth_pc, cna_rates, mutation_rates],
                   keys=['mRNASeq', 'methylation', 'cna', 'mutation'],
                   axis=1)
    gv = gv.dropna(how='all', axis=1)
    return gv
 def _get_meta_features(self, gene_sets, filter_down):
     gs = extract_geneset_pcs(self.df, gene_sets, filter_down)
     self.loadings, self.pct_var, pathways = gs
     if hasattr(self.global_vars, 'background'):
         r = screen_feature(self.global_vars.background, pearson_pandas, 
                            pathways)
         pathways = pathways.ix[r.p > 10e-5]
     pathways = ((pathways.T - pathways.mean(1)) / pathways.std(1)).T
     U, S, pc = frame_svd(pathways)
     
     self.pathways = pathways
     self.features['pathways'] = pathways
     self.global_vars['pathway_pc1'] = pc[0]
     self.global_vars['pathway_pc2'] = pc[1]
     self.global_loadings['pathway_pc1'] = U[0]
     self.global_loadings['pathway_pc2'] = U[1]
Exemple #6
0
 def _get_meta_features(self, gene_sets, filter_down):
     gs = extract_geneset_pcs(self.df, gene_sets, filter_down)
     self.loadings, self.pct_var, pathways = gs
     if hasattr(self.global_vars, 'background'):
         r = screen_feature(self.global_vars.background, pearson_pandas,
                            pathways)
         pathways = pathways.ix[r.p > 10e-5]
     pathways = ((pathways.T - pathways.mean(1)) / pathways.std(1)).T
     U, S, pc = frame_svd(pathways)
     
     self.pathways = pathways
     self.features['pathways'] = pathways
     self.global_vars['pathway_pc1'] = pc[0]
     self.global_vars['pathway_pc2'] = pc[1]
     self.global_loadings['pathway_pc1'] = U[0]
     self.global_loadings['pathway_pc2'] = U[1]
Exemple #7
0
def create_figure_real(cancer, fig_type, vec, file_name):
    if fig_type in cancer.survival_tests:
        hit_vec = -1*(vec < -1) + (vec > 1)
        draw_survival_curves(cancer.clinical, hit_vec, filename=file_name, 
                             labels=['low','normal','high'],
                             **cancer.survival_tests[fig_type])
    elif fig_type in cancer.real_variables:
        series_scatter(vec, cancer.clinical[fig_type].astype(float), 
                       filename=file_name)
    elif fig_type in cancer.binary_variables:
        violin_plot_pandas(cancer.clinical[fig_type], vec, filename=file_name)
        
    elif fig_type == 'pathway_bar':
        genes = cancer.gene_sets[vec.name]
        U,S,vH = frame_svd(cancer.data_matrix.ix[genes].dropna())
        draw_pathway_eig_bar(U, file_name)
 def _get_real_features(self):
     binary, singles, real = extract_features(self.df)
     background_df = real.ix[real.index.diff(singles.index)].dropna()
     background = extract_pc(background_df, 0)
     ss = screen_feature(background['pat_vec'], pearson_pandas, singles)
     singles = singles.ix[ss.p > 10e-5]
     
     singles = ((singles.T - singles.mean(1)) / singles.std(1)).T
     U, S, pc = frame_svd(singles)
     
     self.features['binary'] = binary
     self.features['real'] = singles
     self.global_vars['background'] = background['pat_vec']
     self.global_vars['filtered_pc1'] = pc[0]
     self.global_vars['filtered_pc2'] = pc[1]
     self.global_loadings['background'] = background['gene_vec']
     self.global_loadings['filtered_pc1'] = U[0]
     self.global_loadings['filtered_pc2'] = U[1]
Exemple #9
0
 def _get_real_features(self):
     binary, singles, real = extract_features(self.df)
     background_df = real.ix[real.index.diff(singles.index)].dropna()
     background = extract_pc(background_df, 0)
     ss = screen_feature(background['pat_vec'], pearson_pandas, singles)
     singles = singles.ix[ss.p > 10e-5]
     
     singles = ((singles.T - singles.mean(1)) / singles.std(1)).T
     U, S, pc = frame_svd(singles)
     
     self.features['binary'] = binary
     self.features['real'] = singles
     self.global_vars['background'] = background['pat_vec']
     self.global_vars['filtered_pc1'] = pc[0]
     self.global_vars['filtered_pc2'] = pc[1]
     self.global_loadings['background'] = background['gene_vec']
     self.global_loadings['filtered_pc1'] = U[0]
     self.global_loadings['filtered_pc2'] = U[1]
 def _calc_global_pcs(self, drop_pc1=False):
     '''
     Normalize data and calculate principal components. If drop_pc1 is
     set to True, also reconstructs the normalized data without the
     first PC. 
     '''
     df = self.df.xs('01', axis=1, level=1)
     norm = ((df.T - df.mean(1)) / df.std(1)).T
     U,S,vH = frame_svd(norm)
     self.global_vars['pc1'] = vH[0]
     self.global_vars['pc2'] = vH[1]
     self.global_loadings['pc1'] = U[0]
     self.global_loadings['pc2'] = U[1]        
     if drop_pc1 is True:
         S_n = S.copy()
         S_n[0] = 0
         norm = U.dot(pd.DataFrame(diag(S_n)).dot(vH.T))
         
     return norm
Exemple #11
0
 def _calc_global_pcs(self, drop_pc1=False):
     '''
     Normalize data and calculate principal components. If drop_pc1 is
     set to True, also reconstructs the normalized data without the
     first PC. 
     '''
     df = self.df.xs('01', axis=1, level=1)
     norm = ((df.T - df.mean(1)) / df.std(1)).T
     U, S, vH = frame_svd(norm)
     self.global_vars['pc1'] = vH[0]
     self.global_vars['pc2'] = vH[1]
     self.global_loadings['pc1'] = U[0]
     self.global_loadings['pc2'] = U[1]        
     if drop_pc1 is True:
         S_n = S.copy()
         S_n[0] = 0
         norm = U.dot(pd.DataFrame(diag(S_n)).dot(vH.T))
         
     return norm