def get_gistic_genes(data_path, cancer, filter_with_rna=True, collapse_on_bands=True, min_patients=5): """ Gets a matrix of events for high grade amplifications and homozygous deletions. We filter down this list by asserting that a copy number event corresponds with a resultant expression change. The final matrix merges gene-level events on the same band to combine redundant events and reduce the test space. """ gistic = FH.get_gistic_gene_matrix(data_path, cancer, '01') deletion = gistic[(gistic == -2).sum(1) > min_patients] amp = gistic[(gistic == 2).sum(1) > min_patients] ft = pd.MultiIndex.from_tuples # rediculously long pandas names deletion.index = ft([('Deletion', s[0], s[2]) for s in deletion.index]) amp.index = ft([('Amplification', s[0], s[2]) for s in amp.index]) if filter_with_rna: rna = FH.read_rnaSeq(data_path, cancer) deletion = rna_filter(deletion, -2, rna) amp = rna_filter(amp, 2, rna) cna_genes = amp.append(deletion) if collapse_on_bands == False: return cna_genes cna_genes = pd.DataFrame({(a[0], a[1], tuple(b.index.get_level_values(2))): b.mean().round() for a, b in cna_genes.groupby(level=[0, 1])}).T cna_genes.index = pd.MultiIndex.from_tuples(cna_genes.index) return cna_genes
def get_gistic_genes(data_path, cancer, filter_with_rna=True, collapse_on_bands=True, min_patients=5): ''' Gets a matrix of events for high grade amplifications and homozygous deletions. We filter down this list by asserting that a copy number event corresponds with a resultant expression change. The final matrix merges gene-level events on the same band to combine redundant events and reduce the test space. ''' gistic = FH.get_gistic_gene_matrix(data_path, cancer, '01') deletion = gistic[(gistic == -2).sum(1) > min_patients] amp = gistic[(gistic == 2).sum(1) > min_patients] ft = pd.MultiIndex.from_tuples # rediculously long pandas names deletion.index = ft([('Deletion', s[0], s[2]) for s in deletion.index]) amp.index = ft([('Amplification', s[0], s[2]) for s in amp.index]) if filter_with_rna: rna = FH.read_rnaSeq(data_path, cancer) deletion = rna_filter(deletion, -2, rna) amp = rna_filter(amp, 2, rna) cna_genes = amp.append(deletion) if collapse_on_bands == False: return cna_genes cna_genes = pd.DataFrame({(a[0], a[1], tuple(b.index.get_level_values(2))): b.mean().round() for a, b in cna_genes.groupby(level=[0, 1])}).T cna_genes.index = pd.MultiIndex.from_tuples(cna_genes.index) return cna_genes
def get_cna_rates(data_path, cancer, patients=None): """ Get copy-number aberration rates from GISTIC processing pipeline. This function depends on the current Firehose output of this program as of July 2013. """ gistic = FH.get_gistic_gene_matrix(data_path, cancer) amp_gene_all = (gistic >= 1).astype(int).sum() amp_gene_high = (gistic == 2).astype(int).sum() del_gene_all = (gistic <= -1).astype(int).sum() del_gene_homo = (gistic <= -2).astype(int).sum() lesions = FH.get_gistic_lesions(data_path, cancer) amp_lesion_all = (lesions.ix['Amplification'] >= 1).sum() amp_lesion_high = (lesions.ix['Amplification'] == 2).sum() del_lesion_all = (lesions.ix['Deletion'] <= -1).sum() del_lesion_homo = (lesions.ix['Deletion'] == -2).sum() arm_cn = FH.get_gistic_arm_values(data_path, cancer) chromosomal_instability = arm_cn.abs().mean() cna_df = {'gene_amp': amp_gene_all, 'gene_amp_high': amp_gene_high, 'gene_del': del_gene_all, 'gene_del_homo': del_gene_homo, 'lesion_amp': amp_lesion_all, 'lesion_amp_high': amp_lesion_high, 'lesion_del': del_lesion_all, 'lesion_del_homo': del_lesion_homo, 'chrom_instability': chromosomal_instability} cna_df = pd.DataFrame(cna_df) if patients is not None: cna_df = cna_df.ix[patients].dropna() return cna_df
def get_cna_rates(data_path, cancer, patients=None): ''' Get copy-number aberration rates from GISTIC processing pipeline. This function depends on the current Firehose output of this program as of July 2013. ''' gistic = FH.get_gistic_gene_matrix(data_path, cancer) amp_gene_all = (gistic >= 1).astype(int).sum() amp_gene_high = (gistic == 2).astype(int).sum() del_gene_all = (gistic <= -1).astype(int).sum() del_gene_homo = (gistic <= -2).astype(int).sum() lesions = FH.get_gistic_lesions(data_path, cancer) amp_lesion_all = (lesions.ix['Amplification'] >= 1).sum() amp_lesion_high = (lesions.ix['Amplification'] == 2).sum() del_lesion_all = (lesions.ix['Deletion'] <= -1).sum() del_lesion_homo = (lesions.ix['Deletion'] == -2).sum() arm_cn = FH.get_gistic_arm_values(data_path, cancer) chromosomal_instability = arm_cn.abs().mean() cna_df = { 'gene_amp': amp_gene_all, 'gene_amp_high': amp_gene_high, 'gene_del': del_gene_all, 'gene_del_homo': del_gene_homo, 'lesion_amp': amp_lesion_all, 'lesion_amp_high': amp_lesion_high, 'lesion_del': del_lesion_all, 'lesion_del_homo': del_lesion_homo, 'chrom_instability': chromosomal_instability } cna_df = pd.DataFrame(cna_df) if patients is not None: cna_df = cna_df.ix[patients].dropna() return cna_df