def get_cna_rates(data_path, cancer, patients=None): """ Get copy-number aberration rates from GISTIC processing pipeline. This function depends on the current Firehose output of this program as of July 2013. """ gistic = FH.get_gistic_gene_matrix(data_path, cancer) amp_gene_all = (gistic >= 1).astype(int).sum() amp_gene_high = (gistic == 2).astype(int).sum() del_gene_all = (gistic <= -1).astype(int).sum() del_gene_homo = (gistic <= -2).astype(int).sum() lesions = FH.get_gistic_lesions(data_path, cancer) amp_lesion_all = (lesions.ix['Amplification'] >= 1).sum() amp_lesion_high = (lesions.ix['Amplification'] == 2).sum() del_lesion_all = (lesions.ix['Deletion'] <= -1).sum() del_lesion_homo = (lesions.ix['Deletion'] == -2).sum() arm_cn = FH.get_gistic_arm_values(data_path, cancer) chromosomal_instability = arm_cn.abs().mean() cna_df = {'gene_amp': amp_gene_all, 'gene_amp_high': amp_gene_high, 'gene_del': del_gene_all, 'gene_del_homo': del_gene_homo, 'lesion_amp': amp_lesion_all, 'lesion_amp_high': amp_lesion_high, 'lesion_del': del_lesion_all, 'lesion_del_homo': del_lesion_homo, 'chrom_instability': chromosomal_instability} cna_df = pd.DataFrame(cna_df) if patients is not None: cna_df = cna_df.ix[patients].dropna() return cna_df
def get_gistic(data_path, cancer, filter_with_rna=True, collapse_on_bands=True, min_patients=5): """ Get the combined GISTIC feature matrix for testing. """ lesions = FH.get_gistic_lesions(cancer, data_path) cna_genes = get_gistic_genes(data_path, cancer, filter_with_rna, collapse_on_bands, min_patients) cna = cna_genes.append(lesions) return cna
def get_gistic(data_path, cancer, filter_with_rna=True, collapse_on_bands=True, min_patients=5): ''' Get the combined GISTIC feature matrix for testing. ''' lesions = FH.get_gistic_lesions(cancer, data_path) cna_genes = get_gistic_genes(data_path, cancer, filter_with_rna, collapse_on_bands, min_patients) cna = cna_genes.append(lesions) return cna
def get_cna_rates(data_path, cancer, patients=None): ''' Get copy-number aberration rates from GISTIC processing pipeline. This function depends on the current Firehose output of this program as of July 2013. ''' gistic = FH.get_gistic_gene_matrix(data_path, cancer) amp_gene_all = (gistic >= 1).astype(int).sum() amp_gene_high = (gistic == 2).astype(int).sum() del_gene_all = (gistic <= -1).astype(int).sum() del_gene_homo = (gistic <= -2).astype(int).sum() lesions = FH.get_gistic_lesions(data_path, cancer) amp_lesion_all = (lesions.ix['Amplification'] >= 1).sum() amp_lesion_high = (lesions.ix['Amplification'] == 2).sum() del_lesion_all = (lesions.ix['Deletion'] <= -1).sum() del_lesion_homo = (lesions.ix['Deletion'] == -2).sum() arm_cn = FH.get_gistic_arm_values(data_path, cancer) chromosomal_instability = arm_cn.abs().mean() cna_df = { 'gene_amp': amp_gene_all, 'gene_amp_high': amp_gene_high, 'gene_del': del_gene_all, 'gene_del_homo': del_gene_homo, 'lesion_amp': amp_lesion_all, 'lesion_amp_high': amp_lesion_high, 'lesion_del': del_lesion_all, 'lesion_del_homo': del_lesion_homo, 'chrom_instability': chromosomal_instability } cna_df = pd.DataFrame(cna_df) if patients is not None: cna_df = cna_df.ix[patients].dropna() return cna_df