def get_gistic_genes(data_path, cancer, filter_with_rna=True, collapse_on_bands=True, min_patients=5): """ Gets a matrix of events for high grade amplifications and homozygous deletions. We filter down this list by asserting that a copy number event corresponds with a resultant expression change. The final matrix merges gene-level events on the same band to combine redundant events and reduce the test space. """ gistic = FH.get_gistic_gene_matrix(data_path, cancer, '01') deletion = gistic[(gistic == -2).sum(1) > min_patients] amp = gistic[(gistic == 2).sum(1) > min_patients] ft = pd.MultiIndex.from_tuples # rediculously long pandas names deletion.index = ft([('Deletion', s[0], s[2]) for s in deletion.index]) amp.index = ft([('Amplification', s[0], s[2]) for s in amp.index]) if filter_with_rna: rna = FH.read_rnaSeq(data_path, cancer) deletion = rna_filter(deletion, -2, rna) amp = rna_filter(amp, 2, rna) cna_genes = amp.append(deletion) if collapse_on_bands == False: return cna_genes cna_genes = pd.DataFrame({(a[0], a[1], tuple(b.index.get_level_values(2))): b.mean().round() for a, b in cna_genes.groupby(level=[0, 1])}).T cna_genes.index = pd.MultiIndex.from_tuples(cna_genes.index) return cna_genes
def get_gistic_genes(data_path, cancer, filter_with_rna=True, collapse_on_bands=True, min_patients=5): ''' Gets a matrix of events for high grade amplifications and homozygous deletions. We filter down this list by asserting that a copy number event corresponds with a resultant expression change. The final matrix merges gene-level events on the same band to combine redundant events and reduce the test space. ''' gistic = FH.get_gistic_gene_matrix(data_path, cancer, '01') deletion = gistic[(gistic == -2).sum(1) > min_patients] amp = gistic[(gistic == 2).sum(1) > min_patients] ft = pd.MultiIndex.from_tuples # rediculously long pandas names deletion.index = ft([('Deletion', s[0], s[2]) for s in deletion.index]) amp.index = ft([('Amplification', s[0], s[2]) for s in amp.index]) if filter_with_rna: rna = FH.read_rnaSeq(data_path, cancer) deletion = rna_filter(deletion, -2, rna) amp = rna_filter(amp, 2, rna) cna_genes = amp.append(deletion) if collapse_on_bands == False: return cna_genes cna_genes = pd.DataFrame({(a[0], a[1], tuple(b.index.get_level_values(2))): b.mean().round() for a, b in cna_genes.groupby(level=[0, 1])}).T cna_genes.index = pd.MultiIndex.from_tuples(cna_genes.index) return cna_genes
def get_global_vars(data_path, cancer, patients=None): """ Get compiled DataFrame of global molecular variables from Firehose data. Returns a feature by patient DataFrame with (data-type, variable) on the columns and patient barcodes on the index. """ try: data_matrix = FH.read_rnaSeq(data_path, cancer, patients) U, S, vH = frame_svd(data_matrix) exp_pc = pd.DataFrame({'pc1': vH[0], 'pc2': vH[1]}) except: exp_pc = pd.DataFrame() try: data_matrix = read_methylation(data_path, cancer, patients) U, S, vH = frame_svd(data_matrix) meth_pc = pd.DataFrame({'pc1': vH[0], 'pc2': vH[1]}) except: meth_pc = pd.DataFrame() try: meth_age, amar = 'FAIL', 'FAIL' # meth_age, amar = get_age_signal(data_path, cancer) meth_pc = meth_pc.join(meth_age).join(amar) print 'Should probably check this out' except: pass cna_rates = get_cna_rates(data_path, cancer, patients) mutation_rates = get_mutation_rates(data_path, cancer, patients) gv = pd.concat([exp_pc, meth_pc, cna_rates, mutation_rates], keys=['mRNASeq', 'methylation', 'cna', 'mutation'], axis=1) gv = gv.dropna(how='all', axis=1) return gv
def get_global_vars(data_path, cancer, patients=None): ''' Get compiled DataFrame of global molecular variables from Firehose data. Returns a feature by patient DataFrame with (data-type, variable) on the columns and patient barcodes on the index. ''' try: data_matrix = FH.read_rnaSeq(data_path, cancer, patients) U, S, vH = frame_svd(data_matrix) exp_pc = pd.DataFrame({'pc1': vH[0], 'pc2': vH[1]}) except: exp_pc = pd.DataFrame() try: data_matrix = read_methylation(data_path, cancer, patients) U, S, vH = frame_svd(data_matrix) meth_pc = pd.DataFrame({'pc1': vH[0], 'pc2': vH[1]}) except: meth_pc = pd.DataFrame() try: meth_age, amar = 'FAIL', 'FAIL' # meth_age, amar = get_age_signal(data_path, cancer) meth_pc = meth_pc.join(meth_age).join(amar) print 'Should probably check this out' except: pass cna_rates = get_cna_rates(data_path, cancer, patients) mutation_rates = get_mutation_rates(data_path, cancer, patients) gv = pd.concat([exp_pc, meth_pc, cna_rates, mutation_rates], keys=['mRNASeq', 'methylation', 'cna', 'mutation'], axis=1) gv = gv.dropna(how='all', axis=1) return gv