예제 #1
0
파일: qc_test.py 프로젝트: satta/iva
 def test_contigs_and_bases_that_hit_ref(self):
     '''test _contigs_and_bases_that_hit_ref'''
     self.qc.assembly_vs_ref_mummer_hits = {
         'ctg1': [
             mummer.NucmerHit('\t'.join(['1', '100', '1', '100', '100', '100', '100.00', '1008', '762', '1', '1', 'ref1', 'ctg1'])),
             mummer.NucmerHit('\t'.join(['1', '100', '51', '150', '100', '100', '100.00', '1008', '762', '1', '1', 'ref1', 'ctg1'])),
         ],
         'ctg2': [mummer.NucmerHit('\t'.join(['1', '42', '42', '84', '42', '84', '100.00', '42', '84', '1', '1', 'ref2', 'ctg2']))]
     }
     self.assertEqual((193, 2), self.qc._contigs_and_bases_that_hit_ref())
예제 #2
0
파일: qc_test.py 프로젝트: satta/iva
 def test_get_unique_and_repetitive_from_contig_hits(self):
     '''test _get_unique_and_repetitive_from_contig_hits'''
     h1 = mummer.NucmerHit('\t'.join(['1', '10', '1', '10', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1']))
     h2 = mummer.NucmerHit('\t'.join(['1', '10', '21', '30', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1']))
     h3 = mummer.NucmerHit('\t'.join(['1', '10', '25', '35', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1']))
     h4 = mummer.NucmerHit('\t'.join(['1', '10', '29', '40', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1']))
     h5 = mummer.NucmerHit('\t'.join(['1', '10', '70', '90', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1']))
     hits = [h1, h2, h3, h4, h5]
     expect_repetitive = [h2, h3, h4]
     expect_unique = [h1, h5]
     got_unique, got_repetitive  = self.qc._get_unique_and_repetitive_from_contig_hits(hits)
     self.assertEqual(expect_unique, got_unique)
     self.assertEqual(expect_repetitive, got_repetitive)
예제 #3
0
파일: qc_test.py 프로젝트: satta/iva
    def test_contig_placement_in_reference(self):
        '''test _contig_placement_in_reference'''
        h1 = mummer.NucmerHit('\t'.join(['1', '90', '100', '10', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1']))
        h2 = mummer.NucmerHit('\t'.join(['17', '36', '21', '40', '100', '100', '100.00', '100', '100', '1', '+', 'ref2', 'qry1']))
        expected = [(pyfastaq.intervals.Interval(9, 99), 'ref1', pyfastaq.intervals.Interval(0, 89), False, False)]
        self.assertEqual(self.qc._contig_placement_in_reference([h1]), expected)

        expected = [
            (pyfastaq.intervals.Interval(9, 99), 'ref1', pyfastaq.intervals.Interval(0, 89), False, True),
            (pyfastaq.intervals.Interval(20, 39), 'ref2', pyfastaq.intervals.Interval(16, 35), True, True)
        ]

        self.assertEqual(self.qc._contig_placement_in_reference([h1, h2]), expected)
예제 #4
0
파일: qc_test.py 프로젝트: satta/iva
    def test_mummer_coords_file_to_dict(self):
        '''test _mummer_coords_file_to_dict'''
        expected = {

            'qry1': [
                mummer.NucmerHit('\t'.join(['1', '100', '51', '150', '100', '100', '100.00', '1008', '762', '1', '1', 'ref1', 'qry1'])),
                mummer.NucmerHit('\t'.join(['300', '500', '351', '550', '100', '100', '100.00', '1008', '762', '1', '1', 'ref2', 'qry1']))
            ],
            'qry2': [mummer.NucmerHit('\t'.join(['1', '1000', '1', '1000', '1000', '1000', '100.00', '1000', '1542', '1', '1', 'ref2', 'qry2']))]
        }

        got = self.qc._mummer_coords_file_to_dict(os.path.join(data_dir, 'qc_test.mummer_coords_file_to_dict.coords'))
        self.assertEqual(expected, got)
예제 #5
0
파일: qc_test.py 프로젝트: satta/iva
    def test_hash_nucmer_hits_by_ref(self):
        '''test _hash_nucmer_hits_by_ref'''
        hit1 = mummer.NucmerHit('\t'.join(['1', '10', '20', '30', '10', '10', '100.00', '1008', '762', '1', '1', 'ref1', 'qry1']))
        hit2 = mummer.NucmerHit('\t'.join(['1', '10', '20', '30', '10', '10', '100.00', '1008', '762', '1', '1', 'ref1', 'qry1']))
        hit3 = mummer.NucmerHit('\t'.join(['1', '10', '20', '30', '10', '10', '100.00', '1008', '762', '1', '1', 'ref1', 'qry1']))
        hit4 = mummer.NucmerHit('\t'.join(['1', '10', '20', '30', '10', '10', '100.00', '1008', '762', '1', '1', 'ref2', 'qry1']))

        input_dict = {'x': [hit1, hit2, hit3, hit4]}

        expected = {
            'ref1': [hit1, hit2, hit3],
            'ref2': [hit4]
        }

        got = self.qc._hash_nucmer_hits_by_ref(input_dict)
        self.assertEqual(expected, got)
예제 #6
0
 def test_ref_coords(self):
     '''Test ref_coords'''
     hits = ['\t'.join(['1', '100', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref']),
             '\t'.join(['100', '1', '100', '1', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref'])
     ]
     for h in hits:
         m = mummer.NucmerHit(h)
         self.assertEqual(pyfastaq.intervals.Interval(0,99), m.ref_coords())
예제 #7
0
파일: qc_test.py 프로젝트: satta/iva
    def test_calculate_refseq_assembly_stats(self):
        '''test _calculate_refseq_assembly_stats'''
        self.qc.ref_ids = ['ref1', 'ref2', 'ref3', 'ref4']
        self.qc.ref_lengths = {x:1000 for x in self.qc.ref_ids}
        self.qc.assembly_vs_ref_mummer_hits = {'x': [
                mummer.NucmerHit('\t'.join(['1', '1000', '1', '1000', '1000', '1000', '100.00', '1000', '1000', '1', '1', 'ref1', 'ctg1'])),
                mummer.NucmerHit('\t'.join(['1', '800', '1', '800', '800', '800', '100.00', '800', '800', '1', '1', 'ref2', 'ctg2'])),
                mummer.NucmerHit('\t'.join(['1', '500', '1', '500', '1000', '500', '100.00', '500', '500', '1', '1', 'ref3', 'ctg3.1'])),
                mummer.NucmerHit('\t'.join(['501', '1000', '1', '500', '1000', '500', '100.00', '500', '500', '1', '1', 'ref3', 'ctg3.2']))
            ]
        }

        expected = {
            'ref1': {
                'hits': 1,
                'bases_assembled': 1000,
                'assembled': True,
                'assembled_ok': True,
                'longest_matching_contig': 1000,
            },
            'ref2': {
                'hits': 1,
                'bases_assembled': 800,
                'assembled': False,
                'assembled_ok': False,
                'longest_matching_contig': 800,
            },
            'ref3': {
                'hits': 2,
                'bases_assembled': 1000,
                'assembled': True,
                'assembled_ok': False,
                'longest_matching_contig': 500,
            },
            'ref4': {
                'hits': 0,
                'bases_assembled': 0,
                'assembled': False,
                'assembled_ok': False,
                'longest_matching_contig': 0,
            }
        }

        self.qc._calculate_refseq_assembly_stats()
        self.maxDiff = None
        self.assertEqual(expected, self.qc.refseq_assembly_stats)
예제 #8
0
파일: qc_test.py 프로젝트: satta/iva
    def test_get_overlapping_qry_hits(self):
        '''test _get_overlapping_qry_hits'''
        h1 = mummer.NucmerHit('\t'.join(['1', '10', '1', '10', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1']))
        h2 = mummer.NucmerHit('\t'.join(['1', '10', '21', '30', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1']))
        h3 = mummer.NucmerHit('\t'.join(['1', '10', '25', '35', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1']))
        h4 = mummer.NucmerHit('\t'.join(['1', '10', '29', '40', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1']))
        h5 = mummer.NucmerHit('\t'.join(['1', '10', '70', '90', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1']))
        hits = [h1, h2, h3, h4, h5]

        expected = [
            [],
            [h3, h4],
            [h2, h4],
            [h2, h3],
            []
        ]

        self.assertEqual(len(hits), len(expected))

        for i in range(len(hits)):
            self.assertEqual(self.qc._get_overlapping_qry_hits(hits, hits[i]), expected[i])
예제 #9
0
    def test_is_self_hit(self):
        '''Test is_self_hit'''
        tests = [('\t'.join(['1', '100', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref']), True),
            ('\t'.join(['1', '101', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref']), False),
            ('\t'.join(['2', '100', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref']), False),
            ('\t'.join(['1', '100', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref2']), False),
            ('\t'.join(['1', '100', '1', '100', '100', '100', '99.9', '1000', '1000', '1', '1', 'ref', 'ref']), False),
        ]

        for t in tests:
            m = mummer.NucmerHit(t[0])
            self.assertEqual(m.is_self_hit(), t[1])
        pass
예제 #10
0
파일: qc_test.py 프로젝트: satta/iva
 def test_get_contig_hits_to_reference(self):
     '''test _get_contig_hits_to_reference'''
     self.qc.ref_fasta = os.path.join(data_dir, 'qc_test.reference.fa')
     self.qc._set_ref_fa_data()
     self.qc.ref_gff = os.path.join(data_dir, 'qc_test.reference.cds.gff')
     self.qc.assembly_fasta =  os.path.join(data_dir, 'qc_test.assembly.fasta')
     self.qc._get_contig_hits_to_reference()
     expected = {
         'A:10-1017:+': [mummer.NucmerHit('\t'.join(['10', '1017', '1', '1008', '1008', '1008', '100.00', '1027', '1008', '1', '+', 'A', 'A:10-1017:+'])),
                         mummer.NucmerHit('\t'.join(['10', '240', '1', '231', '231', '231', '100.00', '240', '1008', '1', '+', 'A0', 'A:10-1017:+']))],
         'B:1-1778:-': [mummer.NucmerHit('\t'.join(['1', '1778', '1778', '1', '1778', '1778', '100.00', '1778', '1778', '1', '+', 'B', 'B:1-1778:-']))],
         'C:1-1413:+,E:1-200:-': [mummer.NucmerHit('\t'.join(['1', '1413', '1', '1413', '1413', '1413', '100.00', '1413', '1613', '1', '+', 'C', 'C:1-1413:+,E:1-200:-'])),
                                  mummer.NucmerHit('\t'.join(['1', '200', '1414', '1613', '200', '200', '100.00', '890', '1613', '1', '+', 'E', 'C:1-1413:+,E:1-200:-']))],
         'D:1-1000:+': [mummer.NucmerHit('\t'.join(['1', '1000', '1', '1000', '1000', '1000', '100.00', '1565', '1000', '1', '+', 'D', 'D:1-1000:+']))],
         'E:400-700:-': [mummer.NucmerHit('\t'.join(['400', '700', '301', '1', '301', '301', '100.00', '890', '301', '1', '+', 'E', 'E:400-700:-']))],
         'F:1-2341:+': [mummer.NucmerHit('\t'.join(['1', '2341', '1', '2341', '2341', '2341', '100.00', '2341', '2341', '1', '+', 'F', 'F:1-2341:+']))],
         'F:1-2341:-':  [mummer.NucmerHit('\t'.join(['1', '2341', '2341', '1', '2341', '2341', '100.00', '2341', '2341', '1', '+', 'F', 'F:1-2341:-']))]
     }
     self.assertDictEqual(expected, self.qc.assembly_vs_ref_mummer_hits)
예제 #11
0
    def test_to_graph_edge(self):
        '''Test to_graph_edge'''
        hits = [
            '\t'.join(['781', '981', '10', '210', '200', '200', '98', '1000', '1000', '1', '1', 'ref', 'qry']), # %id too low
            '\t'.join(['781', '980', '10', '210', '199', '200', '98', '1000', '1000', '1', '1', 'ref', 'qry']), # hit too short
            '\t'.join(['781', '981', '10', '209', '200', '199', '98', '1000', '1000', '1', '1', 'ref', 'qry']), # hit too short
            '\t'.join(['1', '200', '1', '200', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref', 'qry']), # bad orientation
            '\t'.join(['200', '1', '200', '1', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref', 'qry']), # bad orientation
            '\t'.join(['800', '1000', '800', '1000', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref', 'qry']), # bad orientation
            '\t'.join(['1000', '800', '1000', '800', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref', 'qry']), # bad orientation
            '\t'.join(['300', '500', '300', '500', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref', 'qry']), # not at ends
            '\t'.join(['1', '1000', '1', '1000', '1000', '1000', '100.00', '1000', '1000', '1', '1', 'ref', 'qry']), # whole contigs hit
            '\t'.join(['1', '500', '1', '1000', '500', '500', '100.00', '500', '1000', '1', '1', 'ref1', 'qry1']), # contained contig
            '\t'.join(['1', '200', '791', '991', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref2', 'qry2']),
            '\t'.join(['781', '981', '10', '210', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref3', 'qry3']),
            '\t'.join(['991', '791', '781', '981', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref4', 'qry4']),
            '\t'.join(['210', '10', '15', '215', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref5', 'qry5']),
            '\t'.join(['781', '981', '991', '791', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref6', 'qry6']),
            '\t'.join(['10', '210', '215', '5', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref7', 'qry7']),
            '\t'.join(['210', '10', '980', '780', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref8', 'qry8']),
            '\t'.join(['995', '795', '215', '15', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref9', 'qry9']),
        ]

        expected = [
            None,
            None,
            None,
            None,
            None,
            None,
            None,
            None,
            None,
            None,
            edge.Edge('qry2', 790, 990, 'ref2', 0, 199),
            edge.Edge('ref3', 780, 980, 'qry3', 9, 209),
            edge.Edge('qry4', 780, 980, 'ref4', 990, 790),
            edge.Edge('ref5', 209, 9, 'qry5', 14, 214),
            edge.Edge('ref6', 780, 980, 'qry6', 990, 790),
            edge.Edge('qry7', 214, 4, 'ref7', 9, 209),
            edge.Edge('ref8', 209, 9, 'qry8', 979, 779),
            edge.Edge('qry9', 214, 14, 'ref9', 994, 794),
        ]

        assert len(expected) == len(hits)

        for i in range(len(hits)):
            m = mummer.NucmerHit(hits[i])
            self.assertEqual(m.to_graph_edge(), expected[i])
예제 #12
0
    def test_is_at_ends(self):
        '''Test is_at_ends'''
        tests = [('\t'.join(['1', '100', '200', '300', '100', '100', '100.00', '1000', '500', '1', '1', 'ref', 'qry']), False, mummer.HIT_AT_START),
            ('\t'.join(['51', '151', '200', '300', '100', '100', '100.00', '1000', '500', '1', '1', 'ref', 'qry']), False, mummer.HIT_NO_ENDS),
            ('\t'.join(['900', '1000', '200', '300', '100', '100', '100.00', '1000', '500', '1', '1', 'ref', 'qry']), False, mummer.HIT_AT_END),
            ('\t'.join(['1000', '900', '200', '300', '100', '100', '100.00', '1000', '500', '1', '1', 'ref', 'qry']), False, mummer.HIT_AT_END),
            ('\t'.join(['850', '949', '200', '300', '100', '100', '100.00', '1000', '500', '1', '1', 'ref', 'qry']), False, mummer.HIT_NO_ENDS),
            ('\t'.join(['42', '992', '200', '1152', '950', '950', '100.00', '1000', '5000', '1', '1', 'ref', 'qry']), False, mummer.HIT_AT_BOTH_ENDS),
            ('\t'.join(['200', '300', '1', '100', '100', '100', '100.00', '500', '1000', '1', '1', 'ref', 'qry']), True, mummer.HIT_AT_START),
            ('\t'.join(['200', '300', '51', '151', '100', '100', '100.00', '500', '1000', '1', '1', 'ref', 'qry']), True, mummer.HIT_NO_ENDS),
            ('\t'.join(['200', '300', '900', '1000', '100', '100', '100.00', '500', '1000', '1', '1', 'ref', 'qry']), True, mummer.HIT_AT_END),
            ('\t'.join(['200', '300', '1000', '900', '100', '100', '100.00', '500', '1000', '1', '1', 'ref', 'qry']), True, mummer.HIT_AT_END),
            ('\t'.join(['200', '300', '850', '949', '100', '100', '100.00', '500', '1000', '1', '1', 'ref', 'qry']), True, mummer.HIT_NO_ENDS),
            ('\t'.join(['200', '300', '42', '992', '950', '950', '100.00', '500', '1000', '1', '1', 'ref', 'qry']), True, mummer.HIT_AT_BOTH_ENDS),
        ]

        for t in tests:
            m = mummer.NucmerHit(t[0])
            self.assertEqual(m._is_at_ends(use_qry=t[1]), t[2])
예제 #13
0
 def test_on_same_strand(self):
     '''test on_same_strand'''
     self.assertTrue(mummer.NucmerHit('\t'.join(['1', '100', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref'])).on_same_strand())
     self.assertTrue(mummer.NucmerHit('\t'.join(['100', '1', '100', '1', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref'])).on_same_strand())
     self.assertFalse(mummer.NucmerHit('\t'.join(['1', '100', '100', '1', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref'])).on_same_strand())
     self.assertFalse(mummer.NucmerHit('\t'.join(['100', '1', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref'])).on_same_strand())