Esempio n. 1
0
class TestEvidenceGathering(unittest.TestCase):
    def setUp(self):
        # test loading of evidence for event found on reference3 1114 2187
        self.ev1 = GenomeEvidence(Breakpoint('reference3',
                                             1114,
                                             orient=ORIENT.RIGHT),
                                  Breakpoint('reference3',
                                             2187,
                                             orient=ORIENT.RIGHT),
                                  BAM_CACHE,
                                  REFERENCE_GENOME,
                                  opposing_strands=True,
                                  read_length=125,
                                  stdev_fragment_size=100,
                                  median_fragment_size=380,
                                  stdev_count_abnormal=3,
                                  min_flanking_pairs_resolution=3,
                                  assembly_min_edge_trim_weight=3)

    def test_collect_split_read(self):
        ev1_sr = MockRead(
            query_name='HISEQX1_11:3:1105:15351:25130:split',
            reference_id=1,
            cigar=[(4, 68), (7, 82)],
            reference_start=1114,
            reference_end=1154,
            query_alignment_start=110,
            query_sequence=
            'TCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTG',
            query_alignment_end=150,
            flag=113,
            next_reference_id=1,
            next_reference_start=2341)
        self.ev1.collect_split_read(ev1_sr, True)
        self.assertEqual(ev1_sr, list(self.ev1.split_reads[0])[0])

    def test_collect_split_read_failure(self):
        # wrong cigar string
        ev1_sr = MockRead(
            query_name='HISEQX1_11:4:1203:3062:55280:split',
            reference_id=1,
            cigar=[(7, 110), (7, 40)],
            reference_start=1114,
            reference_end=1154,
            query_alignment_start=110,
            query_sequence=
            'CTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATG',
            query_alignment_end=150,
            flag=371,
            next_reference_id=1,
            next_reference_start=2550)
        self.assertFalse(self.ev1.collect_split_read(ev1_sr, True))

    def test_collect_flanking_pair(self):
        self.ev1.collect_flanking_pair(
            MockRead(reference_id=1,
                     reference_start=2214,
                     reference_end=2364,
                     is_reverse=True,
                     next_reference_id=1,
                     next_reference_start=1120,
                     mate_is_reverse=True),
            MockRead(reference_id=1,
                     reference_start=1120,
                     reference_end=2364,
                     is_reverse=True,
                     next_reference_id=1,
                     next_reference_start=1120,
                     mate_is_reverse=True,
                     is_read1=False))
        self.assertEqual(1, len(self.ev1.flanking_pairs))

    def test_collect_flanking_pair_not_overlapping_evidence_window(self):
        # first read in pair does not overlap the first evidence window
        # therefore this should return False and not add to the flanking_pairs
        pair = mock_read_pair(
            MockRead(reference_id=1,
                     reference_start=1903,
                     reference_end=2053,
                     is_reverse=True),
            MockRead(reference_id=1,
                     reference_start=2052,
                     reference_end=2053,
                     is_reverse=True))
        self.assertFalse(self.ev1.collect_flanking_pair(*pair))
        self.assertEqual(0, len(self.ev1.flanking_pairs))

#    @unittest.skip("demonstrating skipping")

    def test_load_evidence(self):
        print(self.ev1)
        self.ev1.load_evidence()
        print(self.ev1.spanning_reads)
        self.assertEqual(
            2,
            len([
                r for r in self.ev1.split_reads[0]
                if not r.has_tag(PYSAM_READ_FLAGS.TARGETED_ALIGNMENT)
            ]))
        self.assertEqual(7, len(self.ev1.flanking_pairs))
        self.assertEqual(
            2,
            len([
                r for r in self.ev1.split_reads[1]
                if not r.has_tag(PYSAM_READ_FLAGS.TARGETED_ALIGNMENT)
            ]))


#    @unittest.skip("demonstrating skipping")

    def test_assemble_split_reads(self):
        sr1 = MockRead(
            query_name='HISEQX1_11:3:1105:15351:25130:split',
            query_sequence=
            'TCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTG',
            flag=113)
        sr2 = MockRead(
            query_sequence=
            'GTCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTT',
            flag=121)
        sr3 = MockRead(
            query_sequence=
            'TCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTG',
            flag=113)
        sr7 = MockRead(
            query_sequence=
            'TGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATA',
            flag=113)
        sr9 = MockRead(
            query_sequence=
            'TGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGC',
            flag=113)
        sr12 = MockRead(
            query_sequence=
            'GATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAG',
            flag=113)
        sr15 = MockRead(
            query_sequence=
            'GTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACA',
            flag=113)
        sr19 = MockRead(
            query_sequence=
            'TGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCA',
            flag=113)
        sr24 = MockRead(
            query_sequence=
            'CTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTT',
            flag=113)
        self.ev1.split_reads = ({sr1},
                                {sr1, sr3, sr7, sr9, sr12, sr15, sr19,
                                 sr24})  # subset needed to make a contig
        #        self.ev1.split_reads=([],[sr1,sr3,sr5,sr6,sr7,sr8,sr9,sr10,sr11,sr12,sr13,sr14,sr15,sr16,sr17,sr18,sr19,sr20,sr21,sr22,sr23,sr24]) #full set of reads produces different contig from subset.
        # full contig with more read support should be
        # CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT
        self.ev1.half_mapped = (set(), {sr2})
        self.ev1.assemble_contig()
        print(self.ev1.contigs)
        exp = 'CAACAATATGTAGGAAGCCATTATCTGAAGTGTAAGCAACTGCATAGTGCTATTTTAATTATGCATTGCAGGGAAACTGTGAGCAGAGCTATATATTTAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAACCAAACAGGAAAGAATC'
        self.assertEqual(exp, self.ev1.contigs[0].seq)
class TestGenomeEvidenceAddReads(unittest.TestCase):
    def setUp(self):
        self.ge = GenomeEvidence(
            Breakpoint('1', 1500, orient=ORIENT.LEFT),
            Breakpoint('1', 6001, orient=ORIENT.RIGHT),
            BamCache(MockBamFileHandle({'1': 0})),
            None,  # reference_genome
            opposing_strands=False,
            read_length=150,
            stdev_fragment_size=500,
            median_fragment_size=100,
            call_error=0,
            stdev_count_abnormal=1,
        )
        # outer windows (901, 1649)  (5852, 6600)
        # inner windows (1351, 1649)  (5852, 6150)

    def test_collect_flanking_pair_error_unmapped_read(self):
        read, mate = mock_read_pair(
            MockRead('test', 0, 900, 1000, is_reverse=False),
            MockRead('test', 0, 6000, 6099, is_reverse=True),
        )
        read.is_unmapped = True
        with self.assertRaises(ValueError):
            self.ge.collect_flanking_pair(read, mate)

    def test_collect_flanking_pair_error_mate_unmapped(self):
        read, mate = mock_read_pair(
            MockRead('test', 0, 900, 1000, is_reverse=False),
            MockRead('test', 0, 6000, 6099, is_reverse=True),
        )
        mate.is_unmapped = True
        with self.assertRaises(ValueError):
            self.ge.collect_flanking_pair(read, mate)

    def test_collect_flanking_pair_error_query_names_dont_match(self):
        read, mate = mock_read_pair(
            MockRead('test1', 0, 900, 1000, is_reverse=False),
            MockRead('test', 0, 6000, 6099, is_reverse=True),
        )
        with self.assertRaises(ValueError):
            self.ge.collect_flanking_pair(read, mate)

    def test_collect_flanking_pair_error_template_lengths_dont_match(self):
        read, mate = mock_read_pair(
            MockRead('test',
                     0,
                     900,
                     1000,
                     is_reverse=False,
                     template_length=50),
            MockRead('test', 0, 6000, 6099, is_reverse=True),
        )
        mate.template_length = 55
        with self.assertRaises(ValueError):
            self.ge.collect_flanking_pair(read, mate)

    def test_collect_flanking_pair_read_low_mq(self):
        read, mate = mock_read_pair(
            MockRead('test', 0, 900, 1000, is_reverse=False),
            MockRead('test', 0, 6000, 6099, is_reverse=True),
        )
        read.mapping_quality = 0
        self.assertFalse(self.ge.collect_flanking_pair(read, mate))

    def test_collect_flanking_pair_mate_low_mq(self):
        read, mate = mock_read_pair(
            MockRead('test', 0, 900, 1000, is_reverse=False),
            MockRead('test', 0, 6000, 6099, is_reverse=True),
        )
        mate.mapping_quality = 0
        self.assertFalse(self.ge.collect_flanking_pair(read, mate))

    def test_collect_flanking_pair_interchromosomal(self):
        read, mate = mock_read_pair(
            MockRead('test', 1, 900, 1000, is_reverse=False),
            MockRead('test', 0, 6000, 6099, is_reverse=True),
        )
        self.assertFalse(self.ge.collect_flanking_pair(read, mate))