Esempio n. 1
0
def get_gistic_genes(data_path, cancer, filter_with_rna=True,
                     collapse_on_bands=True, min_patients=5):
    """
    Gets a matrix of events for high grade amplifications and homozygous 
    deletions. 
    We filter down this list by asserting that a copy number event corresponds
    with a resultant expression change. 
    The final matrix merges gene-level events on the same band to combine
    redundant events and reduce the test space.     
    """
    gistic = FH.get_gistic_gene_matrix(data_path, cancer, '01')
    deletion = gistic[(gistic == -2).sum(1) > min_patients]
    amp = gistic[(gistic == 2).sum(1) > min_patients]
    ft = pd.MultiIndex.from_tuples  # rediculously long pandas names
    deletion.index = ft([('Deletion', s[0], s[2]) for s in deletion.index])
    amp.index = ft([('Amplification', s[0], s[2]) for s in amp.index])
    
    if filter_with_rna:
        rna = FH.read_rnaSeq(data_path, cancer)
        deletion = rna_filter(deletion, -2, rna)
        amp = rna_filter(amp, 2, rna)
   
    cna_genes = amp.append(deletion)
    if collapse_on_bands == False:
        return cna_genes
    
    cna_genes = pd.DataFrame({(a[0], a[1], tuple(b.index.get_level_values(2))): 
                           b.mean().round() for a, b in 
                           cna_genes.groupby(level=[0, 1])}).T
    cna_genes.index = pd.MultiIndex.from_tuples(cna_genes.index)
    return cna_genes
Esempio n. 2
0
def get_gistic_genes(data_path,
                     cancer,
                     filter_with_rna=True,
                     collapse_on_bands=True,
                     min_patients=5):
    '''
    Gets a matrix of events for high grade amplifications and homozygous 
    deletions. 
    We filter down this list by asserting that a copy number event corresponds
    with a resultant expression change. 
    The final matrix merges gene-level events on the same band to combine
    redundant events and reduce the test space.     
    '''
    gistic = FH.get_gistic_gene_matrix(data_path, cancer, '01')
    deletion = gistic[(gistic == -2).sum(1) > min_patients]
    amp = gistic[(gistic == 2).sum(1) > min_patients]
    ft = pd.MultiIndex.from_tuples  # rediculously long pandas names
    deletion.index = ft([('Deletion', s[0], s[2]) for s in deletion.index])
    amp.index = ft([('Amplification', s[0], s[2]) for s in amp.index])

    if filter_with_rna:
        rna = FH.read_rnaSeq(data_path, cancer)
        deletion = rna_filter(deletion, -2, rna)
        amp = rna_filter(amp, 2, rna)

    cna_genes = amp.append(deletion)
    if collapse_on_bands == False:
        return cna_genes

    cna_genes = pd.DataFrame({(a[0], a[1], tuple(b.index.get_level_values(2))):
                              b.mean().round()
                              for a, b in cna_genes.groupby(level=[0, 1])}).T
    cna_genes.index = pd.MultiIndex.from_tuples(cna_genes.index)
    return cna_genes
Esempio n. 3
0
def get_global_vars(data_path, cancer, patients=None):
    """
    Get compiled DataFrame of global molecular variables from Firehose
    data.  Returns a feature by patient DataFrame with (data-type, variable)
    on the columns and patient barcodes on the index.
    """
    try:
        data_matrix = FH.read_rnaSeq(data_path, cancer, patients)
        U, S, vH = frame_svd(data_matrix)
        exp_pc = pd.DataFrame({'pc1': vH[0], 'pc2': vH[1]})
    except:
        exp_pc = pd.DataFrame()
        
    try:
        data_matrix = read_methylation(data_path, cancer, patients)
        U, S, vH = frame_svd(data_matrix)
        meth_pc = pd.DataFrame({'pc1': vH[0], 'pc2': vH[1]})
    except:
        meth_pc = pd.DataFrame()
        
    try:
        meth_age, amar = 'FAIL', 'FAIL'
        # meth_age, amar = get_age_signal(data_path, cancer) 
        meth_pc = meth_pc.join(meth_age).join(amar)
        print 'Should probably check this out'
    except:
        pass
    
    cna_rates = get_cna_rates(data_path, cancer, patients)
    mutation_rates = get_mutation_rates(data_path, cancer, patients)
    
    gv = pd.concat([exp_pc, meth_pc, cna_rates, mutation_rates],
                    keys=['mRNASeq', 'methylation', 'cna', 'mutation'], axis=1)
    gv = gv.dropna(how='all', axis=1)
    return gv
Esempio n. 4
0
def get_global_vars(data_path, cancer, patients=None):
    '''
    Get compiled DataFrame of global molecular variables from Firehose
    data.  Returns a feature by patient DataFrame with (data-type, variable)
    on the columns and patient barcodes on the index.
    '''
    try:
        data_matrix = FH.read_rnaSeq(data_path, cancer, patients)
        U, S, vH = frame_svd(data_matrix)
        exp_pc = pd.DataFrame({'pc1': vH[0], 'pc2': vH[1]})
    except:
        exp_pc = pd.DataFrame()

    try:
        data_matrix = read_methylation(data_path, cancer, patients)
        U, S, vH = frame_svd(data_matrix)
        meth_pc = pd.DataFrame({'pc1': vH[0], 'pc2': vH[1]})
    except:
        meth_pc = pd.DataFrame()

    try:
        meth_age, amar = 'FAIL', 'FAIL'
        # meth_age, amar = get_age_signal(data_path, cancer)
        meth_pc = meth_pc.join(meth_age).join(amar)
        print 'Should probably check this out'
    except:
        pass

    cna_rates = get_cna_rates(data_path, cancer, patients)
    mutation_rates = get_mutation_rates(data_path, cancer, patients)

    gv = pd.concat([exp_pc, meth_pc, cna_rates, mutation_rates],
                   keys=['mRNASeq', 'methylation', 'cna', 'mutation'],
                   axis=1)
    gv = gv.dropna(how='all', axis=1)
    return gv