def test_coverages(self): """Test that the regions and calculated coverages are the same for both the bfx and pbpy summarize_coverage results. """ # Read the pbpy gff into a dictionary for comparison pbpy_gff_reader = GffIO.GffReader(self.gff_path) pbpy_gff_records = {} for gff_record in pbpy_gff_reader: record_key = (gff_record.seqid.split()[0], gff_record.start, gff_record.end) record_val = gff_record pbpy_gff_records[record_key] = record_val # Recapitulate the first few steps of summarize_coverage.main ds_reader, readers = self._get_readers() interval_lists = summarize_coverage.build_interval_lists(readers) get_region_size_frozen = functools.partial( summarize_coverage.get_region_size, num_refs=len(interval_lists), region_size=0, num_regions=500, force_num_regions=False) for ref_group_id in sorted(interval_lists): gff_generator = summarize_coverage.generate_gff_records( interval_lists[ref_group_id], readers, ref_group_id, get_region_size_frozen, {}) for bfx_gff_record in gff_generator: bfx_key = (bfx_gff_record.seqid, bfx_gff_record.start, bfx_gff_record.end) self.assertIn(bfx_key, pbpy_gff_records) pbpy_gff_record = pbpy_gff_records.pop(bfx_key) self.assertEqual(pbpy_gff_record.cov, bfx_gff_record.cov) self.assertEqual(pbpy_gff_record.gaps, bfx_gff_record.gaps) pbpy_cov2 = [float(k) for k in pbpy_gff_record.cov2.split(',')] bfx_cov2 = [float(k) for k in bfx_gff_record.cov2.split(',')] for pair in zip(pbpy_cov2, bfx_cov2): self.assertAlmostEqual(pair[0], pair[1]) self.assertEqual(pbpy_gff_record.source, bfx_gff_record.source) self.assertEqual(pbpy_gff_record.type, bfx_gff_record.type) self.assertEqual(pbpy_gff_record.score, bfx_gff_record.score) self.assertEqual(pbpy_gff_record.strand, bfx_gff_record.strand) self.assertEqual(pbpy_gff_record.phase, bfx_gff_record.phase) if self.selected_reference is not None: remaining_pbpy_records = {} for record_key in pbpy_gff_records: if record_key[0] == self.selected_reference: remaining_pbpy_records[record_key] = pbpy_gff_records[ record_key] else: remaining_pbpy_records = pbpy_gff_records self.assertEqual(len(remaining_pbpy_records), 0)
def test_metadata(self): """Test that the metadata lines match those from the pbpy version of summarize_coverage. """ ds_reader, bam_readers = self._get_readers() bfx_metadata_lines = summarize_coverage.get_metadata_lines( bam_readers, {}) pbpy_gff_reader = GffIO.GffReader(self.gff_path) pbpy_metadata_lines = pbpy_gff_reader.headers pbpy_i = 0 bfx_i = 0 # GffWriter handles this first line, not get_metadata_lines. So just # make sure it's there in the pbpy file self.assertEqual(pbpy_metadata_lines[0], "##gff-version 3") pbpy_i += 1 self.assertEqual(len(pbpy_metadata_lines[1:]), len(bfx_metadata_lines)) # Check the date line bfx_dateline = bfx_metadata_lines[bfx_i] pbpy_dateline = pbpy_metadata_lines[pbpy_i] self.assertEqual( bfx_dateline.split(' ')[0], pbpy_dateline.split(' ')[0]) pbpy_i += 1 bfx_i += 1 # Check the source line bfx_sourceline = bfx_metadata_lines[bfx_i] pbpy_sourceline = pbpy_metadata_lines[pbpy_i] self.assertEqual(bfx_sourceline, pbpy_sourceline) pbpy_i += 1 bfx_i += 1 # Check the command line bfx_commandline = bfx_metadata_lines[bfx_i] pbpy_commandline = pbpy_metadata_lines[pbpy_i] self.assertEqual( bfx_commandline.split(' ')[0], pbpy_commandline.split(' ')[0]) pbpy_i += 1 bfx_i += 1 while pbpy_i < len(pbpy_metadata_lines): self.assertEqual(pbpy_metadata_lines[pbpy_i], bfx_metadata_lines[bfx_i]) pbpy_i += 1 bfx_i += 1