def read_het_file(self): t_het_header = du.read_file_header(self.tumor_het_file) n_het_header = du.read_file_header(self.normal_het_file) cols_t_type = {t_het_header[0]: str} cols_n_type = {n_het_header[0]: str} tumor_het_table = pd.read_csv(self.tumor_het_file, '\t', index_col=False, low_memory=False, comment='#', dtype=cols_t_type) normal_het_table = pd.read_csv(self.normal_het_file, '\t', index_col=False, low_memory=False, comment='#', dtype=cols_n_type) tumor_het_table = du.fix_het_file_header(tumor_het_table) normal_het_table = du.fix_het_file_header(normal_het_table) tumor_het_table['Chromosome'] = du.chr2num( np.array(tumor_het_table['CONTIG'])) normal_het_table['Chromosome'] = du.chr2num( np.array(normal_het_table['CONTIG'])) tumor_het_table = tumor_het_table[np.isfinite( tumor_het_table['Chromosome'])] tumor_het_table['genomic_coord_x'] = du.hg19_to_linear_positions( np.array(tumor_het_table['Chromosome']), np.array(tumor_het_table['POSITION'])) normal_het_table = normal_het_table[np.isfinite( normal_het_table['Chromosome'])] normal_het_table['genomic_coord_x'] = du.hg19_to_linear_positions( np.array(normal_het_table['Chromosome']), np.array(normal_het_table['POSITION'])) tumor_het_table['AF'] = np.true_divide( tumor_het_table['ALT_COUNT'], tumor_het_table['ALT_COUNT'] + tumor_het_table['REF_COUNT']) normal_het_table['AF'] = np.true_divide( normal_het_table['ALT_COUNT'], normal_het_table['ALT_COUNT'] + normal_het_table['REF_COUNT']) self.het_table = pd.merge(normal_het_table, tumor_het_table, on='genomic_coord_x', suffixes=('_N', '_T'))
def read_seg_file(self): if self.seg_file == 'NULL': self.seg_table = pd.DataFrame(index=[0],columns=['Chromosome','Start.bp','End.bp','n_probes','length','f','tau','genomic_coord_start','genomic_coord_end']) self.het_table = pd.DataFrame(index=[0],columns=['seg_id','tau','f','d','AF_T','AF_N','Chromosome','genomic_coord_x','ALT_COUNT_N' 'ALT_COUNT_T','REF_COUNT_N','REF_COUNT_T']) else: seg_header = du.read_file_header(self.seg_file) cols_seg_type = {seg_header[0]: str} self.seg_table = pd.read_csv(self.seg_file, '\t', index_col=False, low_memory=False, comment='#', dtype=cols_seg_type) self.seg_table = du.fix_seg_file_header(self.seg_table) self.seg_table['Chromosome'] = du.chr2num(np.array(self.seg_table['Chromosome'])) self.seg_table['genomic_coord_start'] = du.hg19_to_linear_positions(np.array(self.seg_table['Chromosome']), np.array(self.seg_table['Start.bp'])) self.seg_table['genomic_coord_end'] = du.hg19_to_linear_positions(np.array(self.seg_table['Chromosome']), np.array(self.seg_table['End.bp']))