class TestEvidenceGathering(unittest.TestCase): def setUp(self): # test loading of evidence for event found on reference3 1114 2187 self.ev1 = GenomeEvidence(Breakpoint('reference3', 1114, orient=ORIENT.RIGHT), Breakpoint('reference3', 2187, orient=ORIENT.RIGHT), BAM_CACHE, REFERENCE_GENOME, opposing_strands=True, read_length=125, stdev_fragment_size=100, median_fragment_size=380, stdev_count_abnormal=3, min_flanking_pairs_resolution=3, assembly_min_edge_trim_weight=3) def test_collect_split_read(self): ev1_sr = MockRead( query_name='HISEQX1_11:3:1105:15351:25130:split', reference_id=1, cigar=[(4, 68), (7, 82)], reference_start=1114, reference_end=1154, query_alignment_start=110, query_sequence= 'TCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTG', query_alignment_end=150, flag=113, next_reference_id=1, next_reference_start=2341) self.ev1.collect_split_read(ev1_sr, True) self.assertEqual(ev1_sr, list(self.ev1.split_reads[0])[0]) def test_collect_split_read_failure(self): # wrong cigar string ev1_sr = MockRead( query_name='HISEQX1_11:4:1203:3062:55280:split', reference_id=1, cigar=[(7, 110), (7, 40)], reference_start=1114, reference_end=1154, query_alignment_start=110, query_sequence= 'CTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATG', query_alignment_end=150, flag=371, next_reference_id=1, next_reference_start=2550) self.assertFalse(self.ev1.collect_split_read(ev1_sr, True)) def test_collect_flanking_pair(self): self.ev1.collect_flanking_pair( MockRead(reference_id=1, reference_start=2214, reference_end=2364, is_reverse=True, next_reference_id=1, next_reference_start=1120, mate_is_reverse=True), MockRead(reference_id=1, reference_start=1120, reference_end=2364, is_reverse=True, next_reference_id=1, next_reference_start=1120, mate_is_reverse=True, is_read1=False)) self.assertEqual(1, len(self.ev1.flanking_pairs)) def test_collect_flanking_pair_not_overlapping_evidence_window(self): # first read in pair does not overlap the first evidence window # therefore this should return False and not add to the flanking_pairs pair = mock_read_pair( MockRead(reference_id=1, reference_start=1903, reference_end=2053, is_reverse=True), MockRead(reference_id=1, reference_start=2052, reference_end=2053, is_reverse=True)) self.assertFalse(self.ev1.collect_flanking_pair(*pair)) self.assertEqual(0, len(self.ev1.flanking_pairs)) # @unittest.skip("demonstrating skipping") def test_load_evidence(self): print(self.ev1) self.ev1.load_evidence() print(self.ev1.spanning_reads) self.assertEqual( 2, len([ r for r in self.ev1.split_reads[0] if not r.has_tag(PYSAM_READ_FLAGS.TARGETED_ALIGNMENT) ])) self.assertEqual(7, len(self.ev1.flanking_pairs)) self.assertEqual( 2, len([ r for r in self.ev1.split_reads[1] if not r.has_tag(PYSAM_READ_FLAGS.TARGETED_ALIGNMENT) ])) # @unittest.skip("demonstrating skipping") def test_assemble_split_reads(self): sr1 = MockRead( query_name='HISEQX1_11:3:1105:15351:25130:split', query_sequence= 'TCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTG', flag=113) sr2 = MockRead( query_sequence= 'GTCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTT', flag=121) sr3 = MockRead( query_sequence= 'TCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTG', flag=113) sr7 = MockRead( query_sequence= 'TGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATA', flag=113) sr9 = MockRead( query_sequence= 'TGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGC', flag=113) sr12 = MockRead( query_sequence= 'GATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAG', flag=113) sr15 = MockRead( query_sequence= 'GTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACA', flag=113) sr19 = MockRead( query_sequence= 'TGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCA', flag=113) sr24 = MockRead( query_sequence= 'CTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTT', flag=113) self.ev1.split_reads = ({sr1}, {sr1, sr3, sr7, sr9, sr12, sr15, sr19, sr24}) # subset needed to make a contig # self.ev1.split_reads=([],[sr1,sr3,sr5,sr6,sr7,sr8,sr9,sr10,sr11,sr12,sr13,sr14,sr15,sr16,sr17,sr18,sr19,sr20,sr21,sr22,sr23,sr24]) #full set of reads produces different contig from subset. # full contig with more read support should be # CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT self.ev1.half_mapped = (set(), {sr2}) self.ev1.assemble_contig() print(self.ev1.contigs) exp = 'CAACAATATGTAGGAAGCCATTATCTGAAGTGTAAGCAACTGCATAGTGCTATTTTAATTATGCATTGCAGGGAAACTGTGAGCAGAGCTATATATTTAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAACCAAACAGGAAAGAATC' self.assertEqual(exp, self.ev1.contigs[0].seq)
class TestGenomeEvidenceAddReads(unittest.TestCase): def setUp(self): self.ge = GenomeEvidence( Breakpoint('1', 1500, orient=ORIENT.LEFT), Breakpoint('1', 6001, orient=ORIENT.RIGHT), BamCache(MockBamFileHandle({'1': 0})), None, # reference_genome opposing_strands=False, read_length=150, stdev_fragment_size=500, median_fragment_size=100, call_error=0, stdev_count_abnormal=1, ) # outer windows (901, 1649) (5852, 6600) # inner windows (1351, 1649) (5852, 6150) def test_collect_flanking_pair_error_unmapped_read(self): read, mate = mock_read_pair( MockRead('test', 0, 900, 1000, is_reverse=False), MockRead('test', 0, 6000, 6099, is_reverse=True), ) read.is_unmapped = True with self.assertRaises(ValueError): self.ge.collect_flanking_pair(read, mate) def test_collect_flanking_pair_error_mate_unmapped(self): read, mate = mock_read_pair( MockRead('test', 0, 900, 1000, is_reverse=False), MockRead('test', 0, 6000, 6099, is_reverse=True), ) mate.is_unmapped = True with self.assertRaises(ValueError): self.ge.collect_flanking_pair(read, mate) def test_collect_flanking_pair_error_query_names_dont_match(self): read, mate = mock_read_pair( MockRead('test1', 0, 900, 1000, is_reverse=False), MockRead('test', 0, 6000, 6099, is_reverse=True), ) with self.assertRaises(ValueError): self.ge.collect_flanking_pair(read, mate) def test_collect_flanking_pair_error_template_lengths_dont_match(self): read, mate = mock_read_pair( MockRead('test', 0, 900, 1000, is_reverse=False, template_length=50), MockRead('test', 0, 6000, 6099, is_reverse=True), ) mate.template_length = 55 with self.assertRaises(ValueError): self.ge.collect_flanking_pair(read, mate) def test_collect_flanking_pair_read_low_mq(self): read, mate = mock_read_pair( MockRead('test', 0, 900, 1000, is_reverse=False), MockRead('test', 0, 6000, 6099, is_reverse=True), ) read.mapping_quality = 0 self.assertFalse(self.ge.collect_flanking_pair(read, mate)) def test_collect_flanking_pair_mate_low_mq(self): read, mate = mock_read_pair( MockRead('test', 0, 900, 1000, is_reverse=False), MockRead('test', 0, 6000, 6099, is_reverse=True), ) mate.mapping_quality = 0 self.assertFalse(self.ge.collect_flanking_pair(read, mate)) def test_collect_flanking_pair_interchromosomal(self): read, mate = mock_read_pair( MockRead('test', 1, 900, 1000, is_reverse=False), MockRead('test', 0, 6000, 6099, is_reverse=True), ) self.assertFalse(self.ge.collect_flanking_pair(read, mate))