Beispiel #1
0
def get_cna_rates(data_path, cancer, patients=None):
    """
    Get copy-number aberration rates from GISTIC processing pipeline.  
    This function depends on the current Firehose output of this program 
    as of July 2013.
    """
    gistic = FH.get_gistic_gene_matrix(data_path, cancer)
    amp_gene_all = (gistic >= 1).astype(int).sum()
    amp_gene_high = (gistic == 2).astype(int).sum()
    del_gene_all = (gistic <= -1).astype(int).sum()
    del_gene_homo = (gistic <= -2).astype(int).sum()
    
    lesions = FH.get_gistic_lesions(data_path, cancer)
    amp_lesion_all = (lesions.ix['Amplification'] >= 1).sum()
    amp_lesion_high = (lesions.ix['Amplification'] == 2).sum()
    del_lesion_all = (lesions.ix['Deletion'] <= -1).sum()
    del_lesion_homo = (lesions.ix['Deletion'] == -2).sum()
    
    arm_cn = FH.get_gistic_arm_values(data_path, cancer)
    chromosomal_instability = arm_cn.abs().mean()
    
    cna_df = {'gene_amp': amp_gene_all, 'gene_amp_high': amp_gene_high,
              'gene_del': del_gene_all, 'gene_del_homo': del_gene_homo,
              'lesion_amp': amp_lesion_all, 'lesion_amp_high': amp_lesion_high,
              'lesion_del': del_lesion_all, 'lesion_del_homo': del_lesion_homo,
              'chrom_instability': chromosomal_instability}
    cna_df = pd.DataFrame(cna_df)
    if patients is not None:
        cna_df = cna_df.ix[patients].dropna()
    return cna_df
Beispiel #2
0
def get_gistic(data_path, cancer, filter_with_rna=True,
               collapse_on_bands=True, min_patients=5):
    """
    Get the combined GISTIC feature matrix for testing. 
    """
    lesions = FH.get_gistic_lesions(cancer, data_path)
    cna_genes = get_gistic_genes(data_path, cancer, filter_with_rna,
                                 collapse_on_bands, min_patients)
    cna = cna_genes.append(lesions)
    return cna
Beispiel #3
0
def get_gistic(data_path,
               cancer,
               filter_with_rna=True,
               collapse_on_bands=True,
               min_patients=5):
    '''
    Get the combined GISTIC feature matrix for testing. 
    '''
    lesions = FH.get_gistic_lesions(cancer, data_path)
    cna_genes = get_gistic_genes(data_path, cancer, filter_with_rna,
                                 collapse_on_bands, min_patients)
    cna = cna_genes.append(lesions)
    return cna
Beispiel #4
0
def get_cna_rates(data_path, cancer, patients=None):
    '''
    Get copy-number aberration rates from GISTIC processing pipeline.  
    This function depends on the current Firehose output of this program 
    as of July 2013.
    '''
    gistic = FH.get_gistic_gene_matrix(data_path, cancer)
    amp_gene_all = (gistic >= 1).astype(int).sum()
    amp_gene_high = (gistic == 2).astype(int).sum()
    del_gene_all = (gistic <= -1).astype(int).sum()
    del_gene_homo = (gistic <= -2).astype(int).sum()

    lesions = FH.get_gistic_lesions(data_path, cancer)
    amp_lesion_all = (lesions.ix['Amplification'] >= 1).sum()
    amp_lesion_high = (lesions.ix['Amplification'] == 2).sum()
    del_lesion_all = (lesions.ix['Deletion'] <= -1).sum()
    del_lesion_homo = (lesions.ix['Deletion'] == -2).sum()

    arm_cn = FH.get_gistic_arm_values(data_path, cancer)
    chromosomal_instability = arm_cn.abs().mean()

    cna_df = {
        'gene_amp': amp_gene_all,
        'gene_amp_high': amp_gene_high,
        'gene_del': del_gene_all,
        'gene_del_homo': del_gene_homo,
        'lesion_amp': amp_lesion_all,
        'lesion_amp_high': amp_lesion_high,
        'lesion_del': del_lesion_all,
        'lesion_del_homo': del_lesion_homo,
        'chrom_instability': chromosomal_instability
    }
    cna_df = pd.DataFrame(cna_df)
    if patients is not None:
        cna_df = cna_df.ix[patients].dropna()
    return cna_df