def test_contigs_and_bases_that_hit_ref(self): '''test _contigs_and_bases_that_hit_ref''' self.qc.assembly_vs_ref_mummer_hits = { 'ctg1': [ mummer.NucmerHit('\t'.join(['1', '100', '1', '100', '100', '100', '100.00', '1008', '762', '1', '1', 'ref1', 'ctg1'])), mummer.NucmerHit('\t'.join(['1', '100', '51', '150', '100', '100', '100.00', '1008', '762', '1', '1', 'ref1', 'ctg1'])), ], 'ctg2': [mummer.NucmerHit('\t'.join(['1', '42', '42', '84', '42', '84', '100.00', '42', '84', '1', '1', 'ref2', 'ctg2']))] } self.assertEqual((193, 2), self.qc._contigs_and_bases_that_hit_ref())
def test_get_unique_and_repetitive_from_contig_hits(self): '''test _get_unique_and_repetitive_from_contig_hits''' h1 = mummer.NucmerHit('\t'.join(['1', '10', '1', '10', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1'])) h2 = mummer.NucmerHit('\t'.join(['1', '10', '21', '30', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1'])) h3 = mummer.NucmerHit('\t'.join(['1', '10', '25', '35', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1'])) h4 = mummer.NucmerHit('\t'.join(['1', '10', '29', '40', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1'])) h5 = mummer.NucmerHit('\t'.join(['1', '10', '70', '90', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1'])) hits = [h1, h2, h3, h4, h5] expect_repetitive = [h2, h3, h4] expect_unique = [h1, h5] got_unique, got_repetitive = self.qc._get_unique_and_repetitive_from_contig_hits(hits) self.assertEqual(expect_unique, got_unique) self.assertEqual(expect_repetitive, got_repetitive)
def test_contig_placement_in_reference(self): '''test _contig_placement_in_reference''' h1 = mummer.NucmerHit('\t'.join(['1', '90', '100', '10', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1'])) h2 = mummer.NucmerHit('\t'.join(['17', '36', '21', '40', '100', '100', '100.00', '100', '100', '1', '+', 'ref2', 'qry1'])) expected = [(pyfastaq.intervals.Interval(9, 99), 'ref1', pyfastaq.intervals.Interval(0, 89), False, False)] self.assertEqual(self.qc._contig_placement_in_reference([h1]), expected) expected = [ (pyfastaq.intervals.Interval(9, 99), 'ref1', pyfastaq.intervals.Interval(0, 89), False, True), (pyfastaq.intervals.Interval(20, 39), 'ref2', pyfastaq.intervals.Interval(16, 35), True, True) ] self.assertEqual(self.qc._contig_placement_in_reference([h1, h2]), expected)
def test_mummer_coords_file_to_dict(self): '''test _mummer_coords_file_to_dict''' expected = { 'qry1': [ mummer.NucmerHit('\t'.join(['1', '100', '51', '150', '100', '100', '100.00', '1008', '762', '1', '1', 'ref1', 'qry1'])), mummer.NucmerHit('\t'.join(['300', '500', '351', '550', '100', '100', '100.00', '1008', '762', '1', '1', 'ref2', 'qry1'])) ], 'qry2': [mummer.NucmerHit('\t'.join(['1', '1000', '1', '1000', '1000', '1000', '100.00', '1000', '1542', '1', '1', 'ref2', 'qry2']))] } got = self.qc._mummer_coords_file_to_dict(os.path.join(data_dir, 'qc_test.mummer_coords_file_to_dict.coords')) self.assertEqual(expected, got)
def test_hash_nucmer_hits_by_ref(self): '''test _hash_nucmer_hits_by_ref''' hit1 = mummer.NucmerHit('\t'.join(['1', '10', '20', '30', '10', '10', '100.00', '1008', '762', '1', '1', 'ref1', 'qry1'])) hit2 = mummer.NucmerHit('\t'.join(['1', '10', '20', '30', '10', '10', '100.00', '1008', '762', '1', '1', 'ref1', 'qry1'])) hit3 = mummer.NucmerHit('\t'.join(['1', '10', '20', '30', '10', '10', '100.00', '1008', '762', '1', '1', 'ref1', 'qry1'])) hit4 = mummer.NucmerHit('\t'.join(['1', '10', '20', '30', '10', '10', '100.00', '1008', '762', '1', '1', 'ref2', 'qry1'])) input_dict = {'x': [hit1, hit2, hit3, hit4]} expected = { 'ref1': [hit1, hit2, hit3], 'ref2': [hit4] } got = self.qc._hash_nucmer_hits_by_ref(input_dict) self.assertEqual(expected, got)
def test_ref_coords(self): '''Test ref_coords''' hits = ['\t'.join(['1', '100', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref']), '\t'.join(['100', '1', '100', '1', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref']) ] for h in hits: m = mummer.NucmerHit(h) self.assertEqual(pyfastaq.intervals.Interval(0,99), m.ref_coords())
def test_calculate_refseq_assembly_stats(self): '''test _calculate_refseq_assembly_stats''' self.qc.ref_ids = ['ref1', 'ref2', 'ref3', 'ref4'] self.qc.ref_lengths = {x:1000 for x in self.qc.ref_ids} self.qc.assembly_vs_ref_mummer_hits = {'x': [ mummer.NucmerHit('\t'.join(['1', '1000', '1', '1000', '1000', '1000', '100.00', '1000', '1000', '1', '1', 'ref1', 'ctg1'])), mummer.NucmerHit('\t'.join(['1', '800', '1', '800', '800', '800', '100.00', '800', '800', '1', '1', 'ref2', 'ctg2'])), mummer.NucmerHit('\t'.join(['1', '500', '1', '500', '1000', '500', '100.00', '500', '500', '1', '1', 'ref3', 'ctg3.1'])), mummer.NucmerHit('\t'.join(['501', '1000', '1', '500', '1000', '500', '100.00', '500', '500', '1', '1', 'ref3', 'ctg3.2'])) ] } expected = { 'ref1': { 'hits': 1, 'bases_assembled': 1000, 'assembled': True, 'assembled_ok': True, 'longest_matching_contig': 1000, }, 'ref2': { 'hits': 1, 'bases_assembled': 800, 'assembled': False, 'assembled_ok': False, 'longest_matching_contig': 800, }, 'ref3': { 'hits': 2, 'bases_assembled': 1000, 'assembled': True, 'assembled_ok': False, 'longest_matching_contig': 500, }, 'ref4': { 'hits': 0, 'bases_assembled': 0, 'assembled': False, 'assembled_ok': False, 'longest_matching_contig': 0, } } self.qc._calculate_refseq_assembly_stats() self.maxDiff = None self.assertEqual(expected, self.qc.refseq_assembly_stats)
def test_get_overlapping_qry_hits(self): '''test _get_overlapping_qry_hits''' h1 = mummer.NucmerHit('\t'.join(['1', '10', '1', '10', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1'])) h2 = mummer.NucmerHit('\t'.join(['1', '10', '21', '30', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1'])) h3 = mummer.NucmerHit('\t'.join(['1', '10', '25', '35', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1'])) h4 = mummer.NucmerHit('\t'.join(['1', '10', '29', '40', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1'])) h5 = mummer.NucmerHit('\t'.join(['1', '10', '70', '90', '100', '100', '100.00', '100', '100', '1', '+', 'ref1', 'qry1'])) hits = [h1, h2, h3, h4, h5] expected = [ [], [h3, h4], [h2, h4], [h2, h3], [] ] self.assertEqual(len(hits), len(expected)) for i in range(len(hits)): self.assertEqual(self.qc._get_overlapping_qry_hits(hits, hits[i]), expected[i])
def test_is_self_hit(self): '''Test is_self_hit''' tests = [('\t'.join(['1', '100', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref']), True), ('\t'.join(['1', '101', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref']), False), ('\t'.join(['2', '100', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref']), False), ('\t'.join(['1', '100', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref2']), False), ('\t'.join(['1', '100', '1', '100', '100', '100', '99.9', '1000', '1000', '1', '1', 'ref', 'ref']), False), ] for t in tests: m = mummer.NucmerHit(t[0]) self.assertEqual(m.is_self_hit(), t[1]) pass
def test_get_contig_hits_to_reference(self): '''test _get_contig_hits_to_reference''' self.qc.ref_fasta = os.path.join(data_dir, 'qc_test.reference.fa') self.qc._set_ref_fa_data() self.qc.ref_gff = os.path.join(data_dir, 'qc_test.reference.cds.gff') self.qc.assembly_fasta = os.path.join(data_dir, 'qc_test.assembly.fasta') self.qc._get_contig_hits_to_reference() expected = { 'A:10-1017:+': [mummer.NucmerHit('\t'.join(['10', '1017', '1', '1008', '1008', '1008', '100.00', '1027', '1008', '1', '+', 'A', 'A:10-1017:+'])), mummer.NucmerHit('\t'.join(['10', '240', '1', '231', '231', '231', '100.00', '240', '1008', '1', '+', 'A0', 'A:10-1017:+']))], 'B:1-1778:-': [mummer.NucmerHit('\t'.join(['1', '1778', '1778', '1', '1778', '1778', '100.00', '1778', '1778', '1', '+', 'B', 'B:1-1778:-']))], 'C:1-1413:+,E:1-200:-': [mummer.NucmerHit('\t'.join(['1', '1413', '1', '1413', '1413', '1413', '100.00', '1413', '1613', '1', '+', 'C', 'C:1-1413:+,E:1-200:-'])), mummer.NucmerHit('\t'.join(['1', '200', '1414', '1613', '200', '200', '100.00', '890', '1613', '1', '+', 'E', 'C:1-1413:+,E:1-200:-']))], 'D:1-1000:+': [mummer.NucmerHit('\t'.join(['1', '1000', '1', '1000', '1000', '1000', '100.00', '1565', '1000', '1', '+', 'D', 'D:1-1000:+']))], 'E:400-700:-': [mummer.NucmerHit('\t'.join(['400', '700', '301', '1', '301', '301', '100.00', '890', '301', '1', '+', 'E', 'E:400-700:-']))], 'F:1-2341:+': [mummer.NucmerHit('\t'.join(['1', '2341', '1', '2341', '2341', '2341', '100.00', '2341', '2341', '1', '+', 'F', 'F:1-2341:+']))], 'F:1-2341:-': [mummer.NucmerHit('\t'.join(['1', '2341', '2341', '1', '2341', '2341', '100.00', '2341', '2341', '1', '+', 'F', 'F:1-2341:-']))] } self.assertDictEqual(expected, self.qc.assembly_vs_ref_mummer_hits)
def test_to_graph_edge(self): '''Test to_graph_edge''' hits = [ '\t'.join(['781', '981', '10', '210', '200', '200', '98', '1000', '1000', '1', '1', 'ref', 'qry']), # %id too low '\t'.join(['781', '980', '10', '210', '199', '200', '98', '1000', '1000', '1', '1', 'ref', 'qry']), # hit too short '\t'.join(['781', '981', '10', '209', '200', '199', '98', '1000', '1000', '1', '1', 'ref', 'qry']), # hit too short '\t'.join(['1', '200', '1', '200', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref', 'qry']), # bad orientation '\t'.join(['200', '1', '200', '1', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref', 'qry']), # bad orientation '\t'.join(['800', '1000', '800', '1000', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref', 'qry']), # bad orientation '\t'.join(['1000', '800', '1000', '800', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref', 'qry']), # bad orientation '\t'.join(['300', '500', '300', '500', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref', 'qry']), # not at ends '\t'.join(['1', '1000', '1', '1000', '1000', '1000', '100.00', '1000', '1000', '1', '1', 'ref', 'qry']), # whole contigs hit '\t'.join(['1', '500', '1', '1000', '500', '500', '100.00', '500', '1000', '1', '1', 'ref1', 'qry1']), # contained contig '\t'.join(['1', '200', '791', '991', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref2', 'qry2']), '\t'.join(['781', '981', '10', '210', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref3', 'qry3']), '\t'.join(['991', '791', '781', '981', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref4', 'qry4']), '\t'.join(['210', '10', '15', '215', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref5', 'qry5']), '\t'.join(['781', '981', '991', '791', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref6', 'qry6']), '\t'.join(['10', '210', '215', '5', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref7', 'qry7']), '\t'.join(['210', '10', '980', '780', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref8', 'qry8']), '\t'.join(['995', '795', '215', '15', '200', '200', '100.00', '1000', '1000', '1', '1', 'ref9', 'qry9']), ] expected = [ None, None, None, None, None, None, None, None, None, None, edge.Edge('qry2', 790, 990, 'ref2', 0, 199), edge.Edge('ref3', 780, 980, 'qry3', 9, 209), edge.Edge('qry4', 780, 980, 'ref4', 990, 790), edge.Edge('ref5', 209, 9, 'qry5', 14, 214), edge.Edge('ref6', 780, 980, 'qry6', 990, 790), edge.Edge('qry7', 214, 4, 'ref7', 9, 209), edge.Edge('ref8', 209, 9, 'qry8', 979, 779), edge.Edge('qry9', 214, 14, 'ref9', 994, 794), ] assert len(expected) == len(hits) for i in range(len(hits)): m = mummer.NucmerHit(hits[i]) self.assertEqual(m.to_graph_edge(), expected[i])
def test_is_at_ends(self): '''Test is_at_ends''' tests = [('\t'.join(['1', '100', '200', '300', '100', '100', '100.00', '1000', '500', '1', '1', 'ref', 'qry']), False, mummer.HIT_AT_START), ('\t'.join(['51', '151', '200', '300', '100', '100', '100.00', '1000', '500', '1', '1', 'ref', 'qry']), False, mummer.HIT_NO_ENDS), ('\t'.join(['900', '1000', '200', '300', '100', '100', '100.00', '1000', '500', '1', '1', 'ref', 'qry']), False, mummer.HIT_AT_END), ('\t'.join(['1000', '900', '200', '300', '100', '100', '100.00', '1000', '500', '1', '1', 'ref', 'qry']), False, mummer.HIT_AT_END), ('\t'.join(['850', '949', '200', '300', '100', '100', '100.00', '1000', '500', '1', '1', 'ref', 'qry']), False, mummer.HIT_NO_ENDS), ('\t'.join(['42', '992', '200', '1152', '950', '950', '100.00', '1000', '5000', '1', '1', 'ref', 'qry']), False, mummer.HIT_AT_BOTH_ENDS), ('\t'.join(['200', '300', '1', '100', '100', '100', '100.00', '500', '1000', '1', '1', 'ref', 'qry']), True, mummer.HIT_AT_START), ('\t'.join(['200', '300', '51', '151', '100', '100', '100.00', '500', '1000', '1', '1', 'ref', 'qry']), True, mummer.HIT_NO_ENDS), ('\t'.join(['200', '300', '900', '1000', '100', '100', '100.00', '500', '1000', '1', '1', 'ref', 'qry']), True, mummer.HIT_AT_END), ('\t'.join(['200', '300', '1000', '900', '100', '100', '100.00', '500', '1000', '1', '1', 'ref', 'qry']), True, mummer.HIT_AT_END), ('\t'.join(['200', '300', '850', '949', '100', '100', '100.00', '500', '1000', '1', '1', 'ref', 'qry']), True, mummer.HIT_NO_ENDS), ('\t'.join(['200', '300', '42', '992', '950', '950', '100.00', '500', '1000', '1', '1', 'ref', 'qry']), True, mummer.HIT_AT_BOTH_ENDS), ] for t in tests: m = mummer.NucmerHit(t[0]) self.assertEqual(m._is_at_ends(use_qry=t[1]), t[2])
def test_on_same_strand(self): '''test on_same_strand''' self.assertTrue(mummer.NucmerHit('\t'.join(['1', '100', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref'])).on_same_strand()) self.assertTrue(mummer.NucmerHit('\t'.join(['100', '1', '100', '1', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref'])).on_same_strand()) self.assertFalse(mummer.NucmerHit('\t'.join(['1', '100', '100', '1', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref'])).on_same_strand()) self.assertFalse(mummer.NucmerHit('\t'.join(['100', '1', '1', '100', '100', '100', '100.00', '1000', '1000', '1', '1', 'ref', 'ref'])).on_same_strand())