Example #1
0
 def read_het_file(self):
     t_het_header = du.read_file_header(self.tumor_het_file)
     n_het_header = du.read_file_header(self.normal_het_file)
     cols_t_type = {t_het_header[0]: str}
     cols_n_type = {n_het_header[0]: str}
     tumor_het_table = pd.read_csv(self.tumor_het_file,
                                   '\t',
                                   index_col=False,
                                   low_memory=False,
                                   comment='#',
                                   dtype=cols_t_type)
     normal_het_table = pd.read_csv(self.normal_het_file,
                                    '\t',
                                    index_col=False,
                                    low_memory=False,
                                    comment='#',
                                    dtype=cols_n_type)
     tumor_het_table = du.fix_het_file_header(tumor_het_table)
     normal_het_table = du.fix_het_file_header(normal_het_table)
     tumor_het_table['Chromosome'] = du.chr2num(
         np.array(tumor_het_table['CONTIG']))
     normal_het_table['Chromosome'] = du.chr2num(
         np.array(normal_het_table['CONTIG']))
     tumor_het_table = tumor_het_table[np.isfinite(
         tumor_het_table['Chromosome'])]
     tumor_het_table['genomic_coord_x'] = du.hg19_to_linear_positions(
         np.array(tumor_het_table['Chromosome']),
         np.array(tumor_het_table['POSITION']))
     normal_het_table = normal_het_table[np.isfinite(
         normal_het_table['Chromosome'])]
     normal_het_table['genomic_coord_x'] = du.hg19_to_linear_positions(
         np.array(normal_het_table['Chromosome']),
         np.array(normal_het_table['POSITION']))
     tumor_het_table['AF'] = np.true_divide(
         tumor_het_table['ALT_COUNT'],
         tumor_het_table['ALT_COUNT'] + tumor_het_table['REF_COUNT'])
     normal_het_table['AF'] = np.true_divide(
         normal_het_table['ALT_COUNT'],
         normal_het_table['ALT_COUNT'] + normal_het_table['REF_COUNT'])
     self.het_table = pd.merge(normal_het_table,
                               tumor_het_table,
                               on='genomic_coord_x',
                               suffixes=('_N', '_T'))
Example #2
0
    def read_seg_file(self):
        if self.seg_file == 'NULL':
            self.seg_table = pd.DataFrame(index=[0],columns=['Chromosome','Start.bp','End.bp','n_probes','length','f','tau','genomic_coord_start','genomic_coord_end'])
            self.het_table = pd.DataFrame(index=[0],columns=['seg_id','tau','f','d','AF_T','AF_N','Chromosome','genomic_coord_x','ALT_COUNT_N'
                                                             'ALT_COUNT_T','REF_COUNT_N','REF_COUNT_T'])
        else:
            seg_header = du.read_file_header(self.seg_file)
            cols_seg_type = {seg_header[0]: str}
            self.seg_table = pd.read_csv(self.seg_file, '\t', index_col=False, low_memory=False, comment='#',
                                     dtype=cols_seg_type)
            self.seg_table = du.fix_seg_file_header(self.seg_table)

            self.seg_table['Chromosome'] = du.chr2num(np.array(self.seg_table['Chromosome']))

            self.seg_table['genomic_coord_start'] = du.hg19_to_linear_positions(np.array(self.seg_table['Chromosome']),
                                                                            np.array(self.seg_table['Start.bp']))
            self.seg_table['genomic_coord_end'] = du.hg19_to_linear_positions(np.array(self.seg_table['Chromosome']),
                                                                          np.array(self.seg_table['End.bp']))