def test_keep_three_cycle_two_equal_weights(self): """Tests that when there are three cycles with all connections having equal weight, these are preserved under refinement""" filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\ ('gene4', 'gene5'), ('gene6', 'gene7'), ('gene7', 'gene8'),\ ('gene8', 'gene1')]) neighbouring_contigs = [[('Contig1', 'Contig2'), 1, ['gene3', 'gene4']],\ [('Contig2', 'Contig3'), 1, ['gene5', 'gene6']],\ [('Contig1', 'Contig3'), 1, ['gene8', 'gene1']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {}) refine_contig_neighbours_object.contigs = { 'Contig1':{'gene1':None, 'gene2':None, 'gene3':None},\ 'Contig2':{'gene4':None, 'gene5':None} ,\ 'Contig3':{'gene6':None, 'gene7':None, 'gene8':None}} refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\ 'gene4':'Contig2', 'gene5':'Contig2', 'gene6':'Contig3',\ 'gene7':'Contig3', 'gene8':'Contig3'} self.assertEqual(sorted(refine_contig_neighbours_object.refine_contig_neighbours()), sorted(neighbouring_contigs))
def test_one_gene_on_contig(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene4', 'gene5'), ('gene5', 'gene10'), ('gene10', 'geneA')]) neighbouring_contigs = [[('Contig2', 'Contig4'), 1, ['gene5', 'gene10']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa','madansi/tests/data/refine_contig_neighbours_10_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/refine_contig_neighbours_10_blast_hits_file', gene_detector, {'Contig2':['','',800], 'Contig4':['','',50]}) self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(), {'Contig4':{'Contig2':[5,50]}, 'Contig2':{'Contig4':[301,2]}})
def produce_ordered_contig_graph(self): contig_searching = ContigSearching(self.gene_detector, self.filtered_graph) contig_searching.expand_all_contigs() refine_neighbouring_contigs = RefineContigNeighbours( contig_searching.neighbouring_contigs, self.filtered_graph, self.unfiltered_graph, self.filtered_blast_hits_file, self.gene_detector, self.sequences) refine_neighbouring_contigs.refine_contig_neighbours() self.contig_ends = refine_neighbouring_contigs.ends_of_contigs() refined_neighbouring_contigs = refine_neighbouring_contigs.refined_neighbouring_contigs contig_graph_refined = ContigGraph(refined_neighbouring_contigs) contig_graph_unrefined = ContigGraph( contig_searching.neighbouring_contigs) graph_refined = contig_graph_refined.create_contig_subgraph() graph_unrefined = contig_graph_unrefined.create_contig_subgraph() iterate_joining_contig_components = IterateJoiningContigComponents( graph_unrefined) ordered_contig_graph = iterate_joining_contig_components.iterate_joining( graph_refined) return ordered_contig_graph
def test_finding_contig_ends_gene_degree_one(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene9', 'gene8'), ('gene8', 'gene7'), ('gene7', 'gene6'), ('gene7', 'gene4'), ('gene4', 'gene5')]) neighbouring_contigs = [[('Contig3', 'Contig2'), 1, ['gene7', 'gene4']]] gene_detector = GeneDetector( 'madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours( neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, { 'Contig2': ['', '', 800], 'Contig3': ['', '', 2000] }) self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(), { 'Contig3': { 'Contig2': [250, 700] }, 'Contig2': { 'Contig3': [2, 301] } })
def test_finding_contig_ends_gene_degree_one(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene9', 'gene8'), ('gene8', 'gene7'), ('gene7', 'gene6'), ('gene7', 'gene4'), ('gene4', 'gene5')]) neighbouring_contigs = [[('Contig3', 'Contig2'),1, ['gene7', 'gene4']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {'Contig2':['','',800], 'Contig3':['','',2000]}) self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(), {'Contig3':{'Contig2':[250,700]}, 'Contig2':{'Contig3':[2,301]}})
def test_most_occurent_contig_not_in_neighbours(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene4'), ('gene4', 'gene5'), ('gene2', 'gene6'), ('gene6', 'gene7'), ('gene7', 'gene8'), ('gene4', 'gene8')]) neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene2','gene4']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {}) self.assertEqual(sorted(refine_contig_neighbours.refine_contig_neighbours()), [])
def test_one_gene_on_contig(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene4', 'gene5'), ('gene5', 'gene10'), ('gene10', 'geneA')]) neighbouring_contigs = [[('Contig2', 'Contig4'), 1, ['gene5', 'gene10']]] gene_detector = GeneDetector( 'madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_10_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours( neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_10_blast_hits_file', gene_detector, { 'Contig2': ['', '', 800], 'Contig4': ['', '', 50] }) self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(), { 'Contig4': { 'Contig2': [5, 50] }, 'Contig2': { 'Contig4': [301, 2] } })
def test_finding_contig_ends_3_cycle_both_different_half(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene4', 'gene5'), ('gene5', 'geneA'), ('geneA', 'gene8'),\ ('geneA', 'gene6'), ('gene6', 'gene8'), ('gene6', 'gene7')]) neighbouring_contigs = [[('Contig2', 'Contig3'),1,['geneA']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa','madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph, filtered_graph,'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {'Contig2':['','',800], 'Contig3':['','',1200]}) self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(), {'Contig3':{'Contig2':[250,1200]}, 'Contig2':{'Contig3':[301,2]}})
def test_contig_appearances(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'), ('gene4', 'gene5')]) neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene3','gene4']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {}) self.assertDictEqual(refine_contig_neighbours.find_contig_appearances(neighbouring_contigs[0]),{'Contig1':[3, {0:['gene3'], 1:['gene2'], 2:['gene1']}],\ 'Contig2':[2, {0:['gene4'], 1:['gene5']}]})
def test_loop_of_genes_between_contigs(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'geneA'), ('geneA', 'geneB'), ('geneB', 'gene4'), ('gene4', 'gene5'),\ ('gene3', 'gene6'), ('gene6', 'gene7'), ('gene7', 'gene8'), ('gene8', 'gene9'), ('gene9', 'geneB')]) neighbouring_contigs = [[('Contig1', 'Contig2'),2, ['geneA', 'geneB']], [('Contig2', 'Contig3'), 1, ['geneB']], [('Contig1', 'Contig3'),1,['geneA']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph,'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {}) expected_neighbours = [[('Contig2', 'Contig3'), 1, ['geneB']], [('Contig1', 'Contig3'),1,['geneA']]] self.assertEqual(sorted(refine_contig_neighbours.refine_contig_neighbours()), sorted(expected_neighbours))
def test_finding_contig_ends_multiple_genes_same_side_of_closer_gene(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene3', 'gene2'), ('gene3', 'gene4'), ('gene4', 'gene5')]) neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene3', 'gene4']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph, filtered_graph,'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {'Contig1':['','',1000], 'Contig2':['', '', 800]}) refine_contig_neighbours_object.ends_of_contigs() self.assertTrue(refine_contig_neighbours_object.contig_ends == {'Contig1':{'Contig2':[980,3]}, 'Contig2':{'Contig1':[2,301]}} or \ refine_contig_neighbours_object.contig_ends == {'Contig1':{'Contig2':[980,490]}, 'Contig2':{'Contig1':[2,301]}})
def test_add_to_contig_appearances(self): filtered_graph = nx.Graph() gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') neighbouring_contigs = [] refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {'Contig1':['','',1000], 'Contig2':['', '', 800], 'Contig3':['','',2000]}) self.assertDictEqual(refine_contig_neighbours_object.add_to_contig_appearance('gene1', {}, 0), {'Contig1':[1,{0:['gene1']}]}) self.assertDictEqual(refine_contig_neighbours_object.add_to_contig_appearance('gene1', {'Contig1':[0, {}]}, 1), {'Contig1':[1,{1:['gene1']}]}) self.assertDictEqual(refine_contig_neighbours_object.add_to_contig_appearance('gene1', {'Contig1':[1, {0:['gene2']}]}, 1), {'Contig1':[2,{0:['gene2'], 1:['gene1']}]}) self.assertDictEqual(refine_contig_neighbours_object.add_to_contig_appearance('gene1', {'Contig1':[2, {1:['gene2'], 2:['gene3']}]},2), {'Contig1':[3, {1:['gene2'], 2:['gene1', 'gene3']}]})
def test_keep_all_connections_empty_list(self): """Tests an empty graph and initial list of neighbouring contigs""" filtered_graph = nx.Graph() neighbouring_contigs = [] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' ) refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/empty_file',gene_detector, {}) refine_contig_neighbours_object.genes = {} refine_contig_neighbours_object.refine_contig_neighbours() self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), sorted(neighbouring_contigs))
def test_single_gene_from_a_contig(self): """Tests when there are two contigs present, one with a single gene on it when other genes from the same contig are elsewhere in the graph""" filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'), ('gene4', 'gene5')]) neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene2', 'gene3']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_5_blast_hits_file' ) refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_5_blast_hits_file',gene_detector, {}) refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig2',\ 'gene4':'Contig2', 'gene5':'Contig2'} refine_contig_neighbours_object.refine_contig_neighbours() self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), [])
def test_orientation_of_contigs(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\ ('gene4', 'gene5'), ('gene5', 'gene6'), ('gene6', 'gene7'),\ ('gene7', 'gene8')]) neighbouring_contigs = [[('Contig1', 'Contig2'),1, ['gene3','gene4']], [('Contig2', 'Contig3'),1,['gene5','gene6']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file' ) refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph, filtered_graph,'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {'Contig1':['','',1000], 'Contig2':['', '', 800], 'Contig3':['','',2000]}) refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\ 'gene4':'Contig2', 'gene5':'Contig2', 'gene6':'Contig3', \ 'gene7':'Contig3', 'gene8':'Contig3'} refine_contig_neighbours_object.ends_of_contigs() self.assertDictEqual(refine_contig_neighbours_object.contig_ends, {'Contig1': {'Contig2':[980,490]}, 'Contig2':{'Contig1':[2,301], 'Contig3':[301,2]}, 'Contig3':{'Contig2':[1,250]}})
def test_keep_one_connection(self): """Tests that one connection in neighbouring contigs is kept under refinement""" filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene5'), ('gene5', 'gene6'),\ ('gene6', 'gene3'), ('gene3', 'gene4')]) neighbouring_contigs = [[('Contig1', 'Contig2'), 2, ['gene5', 'gene6']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' ) refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/empty_file',gene_detector, {}) refine_contig_neighbours_object.genes = {'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig2', 'gene4':'Contig2'} refine_contig_neighbours_object.refine_contig_neighbours() self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), sorted(neighbouring_contigs))
def test_finds_orientation_two_contigs(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'geneA'),\ ('geneA', 'geneB'), ('geneB', 'gene4'), ('gene4', 'gene5')]) neighbouring_contigs = [[('Contig1', 'Contig2'), 2, ['geneA', 'geneB']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector,{'Contig1':['','',1000], 'Contig2':['', '', 800]}) refine_contig_neighbours_object.contigs = { 'Contig1':{'gene1':None, 'gene2':None, 'gene3':None},\ 'Contig2':{'gene4':None, 'gene5':None}} refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\ 'gene4':'Contig2', 'gene5':'Contig2'} expected_dict = {'Contig1': {'Contig2':[980,490]}, 'Contig2': {'Contig1':[2,301]}} refine_contig_neighbours_object.ends_of_contigs() self.assertDictEqual(refine_contig_neighbours_object.contig_ends, expected_dict)
def test_contig_joins_in_middle(self): """Tests when an intersection is found in the case where one end of the contig is closest to the middle of a second""" filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\ ('gene4', 'gene5'), ('gene5', 'gene6') ,('gene3', 'gene7'),\ ('gene7', 'gene8')]) neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene3', 'gene7']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' ) refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/empty_file',gene_detector, {}) refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1', 'gene4':'Contig1',\ 'gene5':'Contig1', 'gene6':'Contig1', 'gene7':'Contig2', 'gene8':'Contig2'} self.assertEqual(sorted(refine_contig_neighbours_object.refine_contig_neighbours()), [])
def test_three_contigs_together(self): """Tests when there are three contigs within a small distance of the intersection points""" filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene3', 'gene4'), ('gene5', 'gene6'),\ ('gene7','gene2'), ('gene7','gene3'), ('gene7', 'gene5')]) neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene7']], [('Contig2', 'Contig3'),1,['gene7']],\ [('Contig1', 'Contig3'),1,['gene7']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' ) refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph,'madansi/tests/data/empty_file',gene_detector, {}) refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig2',\ 'gene4':'Contig2', 'gene5':'Contig3', 'gene6':'Contig3'} refine_contig_neighbours_object.refine_contig_neighbours() self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), [])
def produce_ordered_contig_graph(self): contig_searching = ContigSearching(self.gene_detector, self.filtered_graph) contig_searching.expand_all_contigs() refine_neighbouring_contigs = RefineContigNeighbours(contig_searching.neighbouring_contigs, self.filtered_graph, self.unfiltered_graph, self.filtered_blast_hits_file, self.gene_detector, self.sequences) refine_neighbouring_contigs.refine_contig_neighbours() self.contig_ends = refine_neighbouring_contigs.ends_of_contigs() refined_neighbouring_contigs= refine_neighbouring_contigs.refined_neighbouring_contigs contig_graph_refined = ContigGraph(refined_neighbouring_contigs) contig_graph_unrefined = ContigGraph(contig_searching.neighbouring_contigs) graph_refined = contig_graph_refined.create_contig_subgraph() graph_unrefined = contig_graph_unrefined.create_contig_subgraph() iterate_joining_contig_components = IterateJoiningContigComponents(graph_unrefined) ordered_contig_graph = iterate_joining_contig_components.iterate_joining(graph_refined) return ordered_contig_graph
def test_keep_two_connections(self): """Tests that two connections are preserved under refinement""" filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\ ('gene4', 'gene5'), ('gene5', 'gene6'), ('gene6', 'gene7'),\ ('gene7', 'gene8'), ('gene8', 'gene9'), ('gene9', 'gene10'),\ ('gene10', 'gene11'), ('gene11', 'gene12'), ('gene12', 'gene13')]) neighbouring_contigs = [[('Contig1', 'Contig2'),1, ['gene4']], [('Contig2', 'Contig3'),2,['gene8','gene9']]] gene_detector = GeneDetector('madansi/tests/data/assembly_7_sequences.fa', 'madansi/tests/data/blast_hits_13' ) refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/blast_hits_13', gene_detector, {}) refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\ 'gene5':'Contig2', 'gene6':'Contig2', 'gene7':'Contig2', \ 'gene10':'Contig3', 'gene11':'Contig3', 'gene13':'Contig3',\ 'gene12':'Contig3'} refine_contig_neighbours_object.refine_contig_neighbours() self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), sorted(neighbouring_contigs))
def test_finding_contig_ends_multiple_genes_same_side_of_closer_gene(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene3', 'gene2'), ('gene3', 'gene4'), ('gene4', 'gene5')]) neighbouring_contigs = [[('Contig1', 'Contig2'), 1, ['gene3', 'gene4']]] gene_detector = GeneDetector( 'madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours( neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, { 'Contig1': ['', '', 1000], 'Contig2': ['', '', 800] }) refine_contig_neighbours_object.ends_of_contigs() self.assertTrue(refine_contig_neighbours_object.contig_ends == {'Contig1':{'Contig2':[980,3]}, 'Contig2':{'Contig1':[2,301]}} or \ refine_contig_neighbours_object.contig_ends == {'Contig1':{'Contig2':[980,490]}, 'Contig2':{'Contig1':[2,301]}})
def test_finds_orientation_two_contigs(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'geneA'),\ ('geneA', 'geneB'), ('geneB', 'gene4'), ('gene4', 'gene5')]) neighbouring_contigs = [[('Contig1', 'Contig2'), 2, ['geneA', 'geneB']]] gene_detector = GeneDetector( 'madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours( neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, { 'Contig1': ['', '', 1000], 'Contig2': ['', '', 800] }) refine_contig_neighbours_object.contigs = { 'Contig1':{'gene1':None, 'gene2':None, 'gene3':None},\ 'Contig2':{'gene4':None, 'gene5':None}} refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\ 'gene4':'Contig2', 'gene5':'Contig2'} expected_dict = { 'Contig1': { 'Contig2': [980, 490] }, 'Contig2': { 'Contig1': [2, 301] } } refine_contig_neighbours_object.ends_of_contigs() self.assertDictEqual(refine_contig_neighbours_object.contig_ends, expected_dict)
def test_short_contig_in_middle(self): """Tests the case when there is a contig with only two genes between longer ones, we should find that the connection that needs more iterations is removed""" filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\ ('gene4', 'gene5'), ('gene5', 'gene6'), ('gene6', 'gene7'),\ ('gene7', 'gene8')]) neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene3','gene4']],\ [('Contig2', 'Contig3'),1,['gene5', 'gene6']],\ [('Contig1', 'Contig3'),2,['gene4', 'gene5']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file' ) refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {}) refine_contig_neighbours_object.contigs = { 'Contig1': {'gene1':None, 'gene2':None, 'gene3':None},\ 'Contig2':{'gene4':None, 'gene5':None},\ 'Contig3':{'gene6':None, 'gene7':None, 'gene8':None}} refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\ 'gene4':'Contig2', 'gene5':'Contig2', 'gene6':'Contig3',\ 'gene7':'Contig3', 'gene8':'Contig3'} self.assertEqual(sorted(refine_contig_neighbours_object.refine_contig_neighbours()), sorted([[('Contig1', 'Contig2'),1,['gene3', 'gene4']], [('Contig2', 'Contig3'),1,['gene5', 'gene6']]]))
def test_orientation_further_separation(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'geneA'),\ ('geneA', 'geneB'), ('geneB', 'gene4'), ('gene4', 'gene5'),\ ('gene5', 'geneC'), ('geneC', 'gene6'), ('gene6', 'gene7'),\ ('gene7', 'gene8')]) neighbouring_contigs = [[('Contig1', 'Contig2'), 1, ['geneA', 'geneB']], [('Contig2', 'Contig3'), 1, ['geneC']]] gene_detector = GeneDetector( 'madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours( neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, { 'Contig1': ['', '', 1000], 'Contig2': ['', '', 800], 'Contig3': ['', '', 2000] }) refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\ 'gene4':'Contig2', 'gene5':'Contig2', 'gene6':'Contig3', \ 'gene7':'Contig3', 'gene8':'Contig3', 'geneA':None, 'geneB':None, 'geneC':None} refine_contig_neighbours_object.ends_of_contigs() self.assertDictEqual( refine_contig_neighbours_object.contig_ends, { 'Contig1': { 'Contig2': [980, 490] }, 'Contig2': { 'Contig1': [2, 301], 'Contig3': [301, 2] }, 'Contig3': { 'Contig2': [1, 250] } })
def test_finding_contig_ends_3_cycle_both_different_half(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene4', 'gene5'), ('gene5', 'geneA'), ('geneA', 'gene8'),\ ('geneA', 'gene6'), ('gene6', 'gene8'), ('gene6', 'gene7')]) neighbouring_contigs = [[('Contig2', 'Contig3'), 1, ['geneA']]] gene_detector = GeneDetector( 'madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours( neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, { 'Contig2': ['', '', 800], 'Contig3': ['', '', 1200] }) self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(), { 'Contig3': { 'Contig2': [250, 1200] }, 'Contig2': { 'Contig3': [301, 2] } })
def test_add_to_contig_appearances(self): filtered_graph = nx.Graph() gene_detector = GeneDetector( 'madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') neighbouring_contigs = [] refine_contig_neighbours_object = RefineContigNeighbours( neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, { 'Contig1': ['', '', 1000], 'Contig2': ['', '', 800], 'Contig3': ['', '', 2000] }) self.assertDictEqual( refine_contig_neighbours_object.add_to_contig_appearance( 'gene1', {}, 0), {'Contig1': [1, { 0: ['gene1'] }]}) self.assertDictEqual( refine_contig_neighbours_object.add_to_contig_appearance( 'gene1', {'Contig1': [0, {}]}, 1), {'Contig1': [1, { 1: ['gene1'] }]}) self.assertDictEqual( refine_contig_neighbours_object.add_to_contig_appearance( 'gene1', {'Contig1': [1, { 0: ['gene2'] }]}, 1), {'Contig1': [2, { 0: ['gene2'], 1: ['gene1'] }]}) self.assertDictEqual( refine_contig_neighbours_object.add_to_contig_appearance( 'gene1', {'Contig1': [2, { 1: ['gene2'], 2: ['gene3'] }]}, 2), {'Contig1': [3, { 1: ['gene2'], 2: ['gene1', 'gene3'] }]})