def test_swap(self): '''test _swap''' original_str = '\t'.join( ['1', '506', 'R', '300', '2', '500', 'L', '359']) original_link = link.Link(None, None, None, s=original_str) l = copy.copy(original_link) expected_swap = link.Link( None, None, None, s='\t'.join(['2', '500', 'L', '359', '1', '506', 'R', '300'])) l._swap() self.assertEqual(l, expected_swap) l._swap() self.assertEqual(l, original_link)
def test_update_from_sam(self): '''test update_from_sam''' sam_reader = pysam.Samfile( os.path.join(data_dir, 'graph_test_update_from_sam.bam'), "rb") ref_lengths = {} pyfastaq.tasks.lengths_from_fai( os.path.join(data_dir, 'graph_test_update_from_sam.ref.fa.fai'), ref_lengths) graph = scaffold_graph.Graph(ref_lengths) for sam in sam_reader.fetch(until_eof=True): graph.update_from_sam(sam, sam_reader) self.assertEqual(len(graph.partial_links), 0) self.assertEqual(len(graph.links), 1) key = ('ref1', 'ref2') self.assertTrue(key in graph.links) expected_links = [ link.Link(None, None, None, s='\t'.join([ 'ref1', '506', 'R', '300', 'ref2', '500', 'L', '359' ])) ] self.assertListEqual(graph.links[key], expected_links)
def test_merge(self): '''test merge''' link1 = link.Link( None, None, None, '\t'.join(['1', '506', 'R', '300', '2', '500', 'L', '.'])) link2 = link.Link( None, None, None, '\t'.join(['1', '506', 'R', '.', '2', '500', 'L', '359'])) merged = link.Link( None, None, None, '\t'.join(['1', '506', 'R', '300', '2', '500', 'L', '359'])) with self.assertRaises(link.Error): link1.merge(link1) link2.merge(link2) link1.merge(link2) self.assertEqual(link1, merged)
def test_distance_to_contig_end(self): '''test _distance_to_contig_end''' links = [ link.Link( None, None, None, '\t'.join(['1', '506', 'R', '300', '2', '500', 'L', '.'])), link.Link( None, None, None, '\t'.join(['1', '506', 'R', '.', '2', '500', 'L', '359'])) ] self.assertEqual(links[0]._distance_to_contig_end(1), 205) self.assertEqual(links[1]._distance_to_contig_end(2), 359) with self.assertRaises(link.Error): links[0]._distance_to_contig_end(2) with self.assertRaises(link.Error): links[1]._distance_to_contig_end(1)
def test_lt(self): '''test __lt__''' l = [ (['1', '10', 'R', '5', '2', '20', 'L', '3'], ['1', '10', 'R', '5', '2', '20', 'L', '3'], False), (['1', '10', 'R', '5', '2', '20', 'L', '3'], ['1', '10', 'R', '5', '2', '20', 'L', '4'], True), (['1', '10', 'R', '5', '2', '20', 'L', '3'], ['1', '10', 'R', '6', '2', '20', 'L', '3'], True), (['1', '10', 'R', '5', '2', '20', 'L', '3'], ['2', '10', 'R', '6', '2', '20', 'L', '3'], True), (['2', '10', 'R', '5', '2', '20', 'L', '3'], ['1', '10', 'R', '6', '2', '20', 'L', '3'], False), ] for t in l: link1 = link.Link(None, None, None, '\t'.join(t[0])) link2 = link.Link(None, None, None, '\t'.join(t[1])) self.assertEqual(link1 < link2, t[2])
def test_insert_size(self): '''test insert_size''' link1 = link.Link( None, None, None, '\t'.join(['1', '506', 'R', '300', '2', '500', 'L', '.'])) link2 = link.Link( None, None, None, '\t'.join(['1', '506', 'R', '.', '2', '500', 'L', '359'])) link3 = link.Link( None, None, None, '\t'.join(['1', '506', 'R', '300', '2', '500', 'L', '359'])) with self.assertRaises(link.Error): link1.insert_size() with self.assertRaises(link.Error): link2.insert_size() self.assertEqual(link3.insert_size(), 564)
def test_distance_to_contig_ends(self): '''test _distance_to_contig_ends''' link1 = link.Link( None, None, None, '\t'.join(['1', '506', 'R', '300', '2', '500', 'L', '.'])) link2 = link.Link( None, None, None, '\t'.join(['1', '506', 'R', '.', '2', '500', 'L', '359'])) link3 = link.Link( None, None, None, '\t'.join(['1', '506', 'R', '300', '2', '500', 'L', '359'])) with self.assertRaises(link.Error): link1._distance_to_contig_ends() with self.assertRaises(link.Error): link2._distance_to_contig_ends() self.assertEqual(link3._distance_to_contig_ends(), (205, 359))
def test_init_no_links(self): '''test link __init__ no links made''' sam_reader = pysam.Samfile( os.path.join(data_dir, 'link_test_init.reads.no_link.bam'), "rb") ref_lengths = {} pyfastaq.tasks.lengths_from_fai( os.path.join(data_dir, 'link_test_init.ref.fa.fai'), ref_lengths) links_from_bam = [] for sam in sam_reader.fetch(until_eof=True): with self.assertRaises(link.Error): l = link.Link(sam, sam_reader, ref_lengths)
def test_make_graph(self): '''test _make_graph''' ref_lengths = {'ref1': 100, 'ref2': 200, 'ref3': 300} g = scaffold_graph.Graph(ref_lengths) g.partial_links = {42: 42} with self.assertRaises(scaffold_graph.Error): g._make_graph(1000) g.partial_links = {} g.links[('ref1', 'ref2')] = [ link.Link( None, None, None, '\t'.join(['ref1', '100', 'R', '50', 'ref2', '200', 'L', '10'])) ] g._make_graph(10) self.assertEqual(len(g.contig_links), 0) g._make_graph(1000) self.assertEqual(len(g.contig_links), 1) expected_contig_links = {('ref1', 'ref2'): {'RL': 1}} self.assertDictEqual(g.contig_links, expected_contig_links) g.links[('ref1', 'ref2')].append( link.Link( None, None, None, '\t'.join(['ref1', '100', 'R', '50', 'ref2', '200', 'L', '10']))) g.links[('ref1', 'ref2')].append( link.Link( None, None, None, '\t'.join(['ref1', '100', 'L', '50', 'ref2', '200', 'R', '10']))) g._make_graph(1000) expected_contig_links = {('ref1', 'ref2'): {'RL': 2, 'LR': 1}} self.assertDictEqual(g.contig_links, expected_contig_links)
def test_write_all_links_to_file(self): '''test write_all_links_to_file''' ref_lengths = {'ref1': 100, 'ref2': 200, 'ref3': 300} g = scaffold_graph.Graph(ref_lengths) g.links[('ref1', 'ref2')] = [ link.Link( None, None, None, '\t'.join(['ref1', '100', 'R', '50', 'ref2', '200', 'L', '10'])) ] g.links[('ref1', 'ref2')].append( link.Link( None, None, None, '\t'.join(['ref1', '100', 'R', '50', 'ref2', '200', 'L', '10']))) g.links[('ref1', 'ref2')].append( link.Link( None, None, None, '\t'.join(['ref1', '100', 'L', '50', 'ref2', '200', 'R', '10']))) g.links[('ref3', 'ref4')] = [ link.Link( None, None, None, '\t'.join(['ref3', '100', 'R', '42', 'ref4', '200', 'L', '42'])) ] tmp_file = 'tmp.contig_links' g.write_all_links_to_file(tmp_file) self.assertTrue( filecmp.cmp(os.path.join(data_dir, 'graph_test_write_all_links_to_file.out'), tmp_file, shallow=False)) os.unlink(tmp_file)
def test_sort(self): '''test sort''' links = [ link.Link(None, None, None, s='\t'.join([ 'ref1', '500', 'L', '359', 'ref2', '506', 'R', '300' ])), link.Link(None, None, None, s='\t'.join([ 'ref2', '500', 'L', '359', 'ref1', '506', 'R', '300' ])) ] expected = [ link.Link(None, None, None, s='\t'.join([ 'ref1', '500', 'L', '359', 'ref2', '506', 'R', '300' ])), link.Link(None, None, None, s='\t'.join([ 'ref1', '506', 'R', '300', 'ref2', '500', 'L', '359' ])), ] assert len(links) == len(expected) for i in range(len(links)): links[i].sort() self.assertEqual(links[i], expected[i])
def update_from_sam(self, sam, sam_reader): '''Updates graph info from a pysam.AlignedSegment object''' if sam.is_unmapped \ or sam.mate_is_unmapped \ or (sam.reference_id == sam.next_reference_id): return new_link = link.Link(sam, sam_reader, self.ref_lengths) read_name = sam.query_name if read_name in self.partial_links: new_link.merge(self.partial_links[read_name]) del self.partial_links[read_name] key = tuple(sorted((new_link.refnames[0], new_link.refnames[1]))) if key not in self.links: self.links[key] = [] new_link.sort() self.links[key].append(new_link) else: self.partial_links[read_name] = new_link
def test_init_with_link(self): '''test link __init__ link made''' sam_reader = pysam.Samfile( os.path.join(data_dir, 'link_test_init.reads.make_link.bam'), "rb") ref_lengths = {} pyfastaq.tasks.lengths_from_fai( os.path.join(data_dir, 'link_test_init.ref.fa.fai'), ref_lengths) links_from_bam = [] for sam in sam_reader.fetch(until_eof=True): links_from_bam.append(link.Link(sam, sam_reader, ref_lengths)) expected = [ '\t'.join(['1', '506', 'R', '300', '2', '500', 'L', '.']), '\t'.join(['1', '506', 'R', '.', '2', '500', 'L', '359']), ] self.assertEqual(len(expected), len(links_from_bam)) for i in range(len(expected)): self.assertEqual(expected[i], str(links_from_bam[i]))
def test_parse(self): '''test parse''' ref_seqs = {} ref_fasta = os.path.join(data_dir, 'bam_parse_test_parse.ref.fa') bam = os.path.join(data_dir, 'bam_parse_test_parse.bam') pyfastaq.tasks.file_to_dict(ref_fasta, ref_seqs) bp = bam_parse.Parser(bam, ref_seqs) bp.parse() expected_soft_clipped = {'ref1': {2: [1, 0], 58: [0, 1]}} expected_unmapped_mates = {'ref1': {2: 1}} l = link.Link( None, None, None, s='\t'.join(['ref1', '500', 'R', '240', 'ref2', '500', 'L', '299'])) expected_link_keys = [('ref1', 'ref2')] self.assertEqual(expected_soft_clipped, bp.soft_clipped) self.assertEqual(expected_unmapped_mates, bp.unmapped_mates) self.assertEqual(list(bp.scaff_graph.links.keys()), expected_link_keys) self.assertEqual(len(bp.scaff_graph.links[expected_link_keys[0]]), 1) self.assertEqual(bp.scaff_graph.links[expected_link_keys[0]][0], l)