def test_read_multiple_bams(bam_filenames, multiple_bams_reads_table_csvs, ignore_multimapping): from outrigger.io.bam import read_multiple_bams test = read_multiple_bams(bam_filenames, ignore_multimapping) dfs = [pd.read_csv(csv) for csv in multiple_bams_reads_table_csvs] true = pd.concat(dfs, ignore_index=True) # Sort and change the index because it's the contents not the order that # matters test = test.sort_values(test.columns.tolist()) test.index = range(len(test.index)) true = true.sort_values(true.columns.tolist()) true.index = range(len(true.index)) pdt.assert_frame_equal(test, true)
def make_junction_reads_file(self): if self.bams is None: util.progress( 'Reading SJ.out.files and creating a big splice junction' ' table of reads spanning exon-exon junctions...') splice_junctions = star.read_multiple_sj_out_tab( self.sj_out_tab, ignore_multimapping=self.ignore_multimapping) else: util.progress('Reading bam files and creating a big splice ' 'junction table of reads spanning exon-exon ' 'junctions') splice_junctions = bam.read_multiple_bams( self.bams, self.ignore_multimapping, self.n_jobs) dirname = os.path.dirname(self.junction_reads) if not os.path.exists(dirname): os.makedirs(dirname) util.progress('Writing {} ...\n'.format(self.junction_reads)) splice_junctions.to_csv(self.junction_reads, index=False) util.done() return splice_junctions