def test_contigs_to_genes_four_hits(self): """Tests output for multiple blast hits""" gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/four_blast_hits' ) self.assertCountEqual(list(gene_detector.contigs_to_genes().keys()), ['Contig1', 'Contig2', 'Contig3']) self.assertCountEqual(list(gene_detector.contigs_to_genes()['Contig1'].gene_objects.keys()), ['gene1', 'gene2', 'gene3']) self.assertCountEqual(list(gene_detector.contigs_to_genes()['Contig2'].gene_objects.keys()), ['geneA']) self.assertCountEqual(list(gene_detector.contigs_to_genes()['Contig3'].gene_objects.keys()), [])
def test_contigs_to_genes_no_hits(self): """Tests that the correct keys are given in the contigs object and that there is no value for each contig in this object""" gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/empty_file') self.assertCountEqual(list(gene_detector.contigs_to_genes().keys()), ['Contig1', 'Contig2', 'Contig3']) self.assertEqual( gene_detector.contigs_to_genes()['Contig1'].gene_objects, {})
def test_finding_contig_ends_gene_degree_one(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene9', 'gene8'), ('gene8', 'gene7'), ('gene7', 'gene6'), ('gene7', 'gene4'), ('gene4', 'gene5')]) neighbouring_contigs = [[('Contig3', 'Contig2'), 1, ['gene7', 'gene4']]] gene_detector = GeneDetector( 'madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours( neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, { 'Contig2': ['', '', 800], 'Contig3': ['', '', 2000] }) self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(), { 'Contig3': { 'Contig2': [250, 700] }, 'Contig2': { 'Contig3': [2, 301] } })
def test_one_gene_on_contig(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene4', 'gene5'), ('gene5', 'gene10'), ('gene10', 'geneA')]) neighbouring_contigs = [[('Contig2', 'Contig4'), 1, ['gene5', 'gene10']]] gene_detector = GeneDetector( 'madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_10_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours( neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_10_blast_hits_file', gene_detector, { 'Contig2': ['', '', 800], 'Contig4': ['', '', 50] }) self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(), { 'Contig4': { 'Contig2': [5, 50] }, 'Contig2': { 'Contig4': [301, 2] } })
def test_most_occurent_contig_not_in_neighbours(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene4'), ('gene4', 'gene5'), ('gene2', 'gene6'), ('gene6', 'gene7'), ('gene7', 'gene8'), ('gene4', 'gene8')]) neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene2','gene4']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {}) self.assertEqual(sorted(refine_contig_neighbours.refine_contig_neighbours()), [])
def test_finds_orientation_two_contigs(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'geneA'),\ ('geneA', 'geneB'), ('geneB', 'gene4'), ('gene4', 'gene5')]) neighbouring_contigs = [[('Contig1', 'Contig2'), 2, ['geneA', 'geneB']]] gene_detector = GeneDetector( 'madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours( neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, { 'Contig1': ['', '', 1000], 'Contig2': ['', '', 800] }) refine_contig_neighbours_object.contigs = { 'Contig1':{'gene1':None, 'gene2':None, 'gene3':None},\ 'Contig2':{'gene4':None, 'gene5':None}} refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\ 'gene4':'Contig2', 'gene5':'Contig2'} expected_dict = { 'Contig1': { 'Contig2': [980, 490] }, 'Contig2': { 'Contig1': [2, 301] } } refine_contig_neighbours_object.ends_of_contigs() self.assertDictEqual(refine_contig_neighbours_object.contig_ends, expected_dict)
def test_keep_three_cycle_two_equal_weights(self): """Tests that when there are three cycles with all connections having equal weight, these are preserved under refinement""" filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\ ('gene4', 'gene5'), ('gene6', 'gene7'), ('gene7', 'gene8'),\ ('gene8', 'gene1')]) neighbouring_contigs = [[('Contig1', 'Contig2'), 1, ['gene3', 'gene4']],\ [('Contig2', 'Contig3'), 1, ['gene5', 'gene6']],\ [('Contig1', 'Contig3'), 1, ['gene8', 'gene1']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {}) refine_contig_neighbours_object.contigs = { 'Contig1':{'gene1':None, 'gene2':None, 'gene3':None},\ 'Contig2':{'gene4':None, 'gene5':None} ,\ 'Contig3':{'gene6':None, 'gene7':None, 'gene8':None}} refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\ 'gene4':'Contig2', 'gene5':'Contig2', 'gene6':'Contig3',\ 'gene7':'Contig3', 'gene8':'Contig3'} self.assertEqual(sorted(refine_contig_neighbours_object.refine_contig_neighbours()), sorted(neighbouring_contigs))
def test_contig_appearances(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'), ('gene4', 'gene5')]) neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene3','gene4']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {}) self.assertDictEqual(refine_contig_neighbours.find_contig_appearances(neighbouring_contigs[0]),{'Contig1':[3, {0:['gene3'], 1:['gene2'], 2:['gene1']}],\ 'Contig2':[2, {0:['gene4'], 1:['gene5']}]})
def test_keep_all_connections_empty_list(self): """Tests an empty graph and initial list of neighbouring contigs""" filtered_graph = nx.Graph() neighbouring_contigs = [] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' ) refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/empty_file',gene_detector, {}) refine_contig_neighbours_object.genes = {} refine_contig_neighbours_object.refine_contig_neighbours() self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), sorted(neighbouring_contigs))
def test_no_difference(self): """Tests the case when all the contigs in the assembly file are also given in the filtered blast hits file""" gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/test_blast_hits') output_file = 'output.fa' unused_contigs = UnusedContigs(gene_detector, output_file, 'madansi/tests/data/assembly.fa') unused_contigs.contigs_not_in_filtered_file() unused_contigs.add_unused_contigs_to_end() self.assertTrue(filecmp.cmp(output_file, 'madansi/tests/data/empty_file')) os.unlink(output_file)
def test_missing_sequence(self): """Tests when there is a difference between the contigs present in the assembly file and those where a gene is present""" gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/test_blast_hits_2') output_file = 'output' unused_contigs = UnusedContigs(gene_detector, output_file, 'madansi/tests/data/assembly.fa') unused_contigs.contigs_not_in_filtered_file() unused_contigs.add_unused_contigs_to_end() self.assertTrue(filecmp.cmp(output_file, 'madansi/tests/data/contig3')) os.unlink(output_file)
def test_set_expansion_one_contig_not_isolated(self): gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/test_blast_hits') filtered_graph = nx.Graph( nx.drawing.nx_pydot.read_dot('madansi/tests/data/test_graph.dot')) contig_searching = ContigSearching(gene_detector, filtered_graph) self.assertEqual( contig_searching.set_expansion(['gene1', 'gene2', 'gene3'], 'Contig1').finished_contigs, set())
def test_loop_of_genes_between_contigs(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'geneA'), ('geneA', 'geneB'), ('geneB', 'gene4'), ('gene4', 'gene5'),\ ('gene3', 'gene6'), ('gene6', 'gene7'), ('gene7', 'gene8'), ('gene8', 'gene9'), ('gene9', 'geneB')]) neighbouring_contigs = [[('Contig1', 'Contig2'),2, ['geneA', 'geneB']], [('Contig2', 'Contig3'), 1, ['geneB']], [('Contig1', 'Contig3'),1,['geneA']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph,'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {}) expected_neighbours = [[('Contig2', 'Contig3'), 1, ['geneB']], [('Contig1', 'Contig3'),1,['geneA']]] self.assertEqual(sorted(refine_contig_neighbours.refine_contig_neighbours()), sorted(expected_neighbours))
def test_one_contig_dummy_genes(self): gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/one_blast_hit') filtered_graph = nx.Graph( nx.drawing.nx_pydot.read_dot( 'madansi/tests/data/one_contig_dummy_genes.dot')) contig_searching = ContigSearching(gene_detector, filtered_graph) contig_searching.expand_all_contigs() contig_neighbourhoods = contig_searching.neighbouring_contigs self.assertEqual(contig_neighbourhoods, [])
def test_initialisation(self): gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/test_blast_hits') filtered_graph = nx.Graph( nx.drawing.nx_pydot.read_dot( 'madansi/tests/data/graph_3_nodes.dot')) contig_searching = ContigSearching(gene_detector, filtered_graph) self.assertDictEqual(contig_searching.genes_in_contig_radius, {}) self.assertCountEqual(contig_searching.neighbouring_contigs, []) self.assertEqual(contig_searching.finished_contigs, set())
def test_find_contig_orientation(self): gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/four_blast_hits') contig_orientation_object = ContigOrientation(nx.Graph(), gene_detector) self.assertEqual( contig_orientation_object.find_contig_orientation('Contig1'), -1) self.assertEqual( contig_orientation_object.find_contig_orientation('Contig2'), 1)
def test_expand_all_contigs_three_contigs(self): gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/test_blast_hits') filtered_graph = nx.Graph( nx.drawing.nx_pydot.read_dot('madansi/tests/data/test_graph.dot')) contig_searching = ContigSearching(gene_detector, filtered_graph) contig_searching.expand_all_contigs() contig_neighbourhoods = contig_searching.neighbouring_contigs self.assertTrue(sorted(contig_neighbourhoods) == sorted([ [sorted(('Contig1', 'Contig2')),1,sorted(['gene1', 'gene4', 'gene3', 'gene5'])] ,\ [sorted(('Contig1', 'Contig3')),1,sorted(['gene2', 'gene6'])] ]))
def test_compare_graphs(self): gene_detector = GeneDetector('madansi/tests/data/assembly_7_sequences.fa', 'madansi/tests/data/test_blast_hits_2') output_file = 'output' unused_contigs = UnusedContigs(gene_detector, output_file, 'madansi/tests/data/assembly_7_sequences.fa') filtered_graph = nx.Graph() filtered_graph.add_edges_from([('Contig1', 'Contig2'), ('Contig2', 'Contig3'), ('Contig3', 'Contig4')]) filtered_graph.add_node('Contig7') unused_contigs.contigs_not_in_filtered_graph(filtered_graph) unused_contigs.add_unused_contigs_to_end() self.assertTrue(filecmp.cmp(output_file, 'madansi/tests/data/test_unused_contigs_in_graph')) os.unlink(output_file)
def test_single_gene_from_a_contig(self): """Tests when there are two contigs present, one with a single gene on it when other genes from the same contig are elsewhere in the graph""" filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'), ('gene4', 'gene5')]) neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene2', 'gene3']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_5_blast_hits_file' ) refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_5_blast_hits_file',gene_detector, {}) refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig2',\ 'gene4':'Contig2', 'gene5':'Contig2'} refine_contig_neighbours_object.refine_contig_neighbours() self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), [])
def test_keep_one_connection(self): """Tests that one connection in neighbouring contigs is kept under refinement""" filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene5'), ('gene5', 'gene6'),\ ('gene6', 'gene3'), ('gene3', 'gene4')]) neighbouring_contigs = [[('Contig1', 'Contig2'), 2, ['gene5', 'gene6']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' ) refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/empty_file',gene_detector, {}) refine_contig_neighbours_object.genes = {'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig2', 'gene4':'Contig2'} refine_contig_neighbours_object.refine_contig_neighbours() self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), sorted(neighbouring_contigs))
def test_expand_all_contigs_two_neighbouring_contigs(self): gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/four_blast_hits') filtered_graph = nx.Graph( nx.drawing.nx_pydot.read_dot( 'madansi/tests/data/graph_4_nodes.dot')) contig_searching = ContigSearching(gene_detector, filtered_graph) contig_searching.expand_all_contigs() contig_neighbourhoods = contig_searching.neighbouring_contigs self.assertTrue( contig_neighbourhoods == [[sorted(('Contig1', 'Contig2')), 1, sorted(['geneA', 'gene1'])]])
def test_expand_all_contigs_three_contigs_multiple_iterations(self): gene_detector = GeneDetector( 'madansi/tests/data/assembly.fa', 'madansi/tests/data/test_blast_hits_three_contigs') filtered_graph = nx.Graph( nx.drawing.nx_pydot.read_dot( 'madansi/tests/data/three_contigs_separated.dot')) contig_searching = ContigSearching(gene_detector, filtered_graph) contig_searching.expand_all_contigs() contig_neighbourhoods = contig_searching.neighbouring_contigs self.assertTrue(sorted(contig_neighbourhoods) == sorted([[ sorted(('Contig1', 'Contig2')),2, sorted(['geneA', 'geneD'])],\ [ sorted(('Contig2', 'Contig3')),2, sorted(['geneB', 'geneE'])],\ [ sorted(('Contig3', 'Contig1')),2, sorted(['geneC', 'geneF'])]]))
def test_contig_joins_in_middle(self): """Tests when an intersection is found in the case where one end of the contig is closest to the middle of a second""" filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\ ('gene4', 'gene5'), ('gene5', 'gene6') ,('gene3', 'gene7'),\ ('gene7', 'gene8')]) neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene3', 'gene7']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' ) refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/empty_file',gene_detector, {}) refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1', 'gene4':'Contig1',\ 'gene5':'Contig1', 'gene6':'Contig1', 'gene7':'Contig2', 'gene8':'Contig2'} self.assertEqual(sorted(refine_contig_neighbours_object.refine_contig_neighbours()), [])
def test_three_contigs_together(self): """Tests when there are three contigs within a small distance of the intersection points""" filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene3', 'gene4'), ('gene5', 'gene6'),\ ('gene7','gene2'), ('gene7','gene3'), ('gene7', 'gene5')]) neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene7']], [('Contig2', 'Contig3'),1,['gene7']],\ [('Contig1', 'Contig3'),1,['gene7']]] gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' ) refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph,'madansi/tests/data/empty_file',gene_detector, {}) refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig2',\ 'gene4':'Contig2', 'gene5':'Contig3', 'gene6':'Contig3'} refine_contig_neighbours_object.refine_contig_neighbours() self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), [])
def test_two_separated_sections(self): gene_detector = GeneDetector( 'madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/seven_blast_hits') filtered_graph = nx.Graph( nx.drawing.nx_pydot.read_dot( 'madansi/tests/data/two_separated_sections.dot')) contig_searching = ContigSearching(gene_detector, filtered_graph) contig_searching.expand_all_contigs() contig_neighbourhoods = contig_searching.neighbouring_contigs possible_expected_lists = \ [sorted([('Contig1', 'Contig2', 1), ('Contig3', 'Contig4', 2)]),\ sorted([('Contig2', 'Contig1', 1), ('Contig3', 'Contig4', 2)]),\ sorted([('Contig1', 'Contig2', 1), ('Contig4', 'Contig3', 2)]),\ sorted([('Contig2', 'Contig1', 1), ('Contig4', 'Contig3', 2)])] self.assertTrue(sorted(contig_neighbourhoods) == sorted([[ sorted(('Contig1', 'Contig2')), 1, sorted(['gene2', 'gene3'])],\ [ sorted(('Contig3', 'Contig4')), 2, sorted(['geneA', 'geneB'])]]))
def test_contigs_to_genes_one_hit(self): """Tests that given one blast hit, will give the correct keys and values""" gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/one_blast_hit') self.assertCountEqual(list(gene_detector.contigs_to_genes().keys()), ['Contig1', 'Contig2', 'Contig3']) self.assertEqual(gene_detector.contigs_to_genes()['Contig2'].gene_objects, {}) self.assertEqual(gene_detector.contigs_to_genes()['Contig3'].gene_objects, {}) self.assertCountEqual(list(gene_detector.contigs_to_genes()['Contig1'].gene_objects.keys()), ['gene1']) my_gene = Gene(-1,402,1,None, 'Contig1',3) self.assertEqual(gene_detector.contigs_to_genes()['Contig1'].gene_objects['gene1'].orientation, my_gene.orientation) self.assertEqual(gene_detector.contigs_to_genes()['Contig1'].gene_objects['gene1'].start, my_gene.start) self.assertEqual(gene_detector.contigs_to_genes()['Contig1'].gene_objects['gene1'].end, my_gene.end) self.assertEqual(gene_detector.contigs_to_genes()['Contig1'].gene_objects['gene1'].node, my_gene.node) self.assertEqual(gene_detector.contigs_to_genes()['Contig1'].gene_objects['gene1'].contig, my_gene.contig) self.assertEqual(gene_detector.contigs_to_genes()['Contig1'].gene_objects['gene1'].qry_start, my_gene.qry_start)
def test_keep_two_connections(self): """Tests that two connections are preserved under refinement""" filtered_graph = nx.Graph() filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\ ('gene4', 'gene5'), ('gene5', 'gene6'), ('gene6', 'gene7'),\ ('gene7', 'gene8'), ('gene8', 'gene9'), ('gene9', 'gene10'),\ ('gene10', 'gene11'), ('gene11', 'gene12'), ('gene12', 'gene13')]) neighbouring_contigs = [[('Contig1', 'Contig2'),1, ['gene4']], [('Contig2', 'Contig3'),2,['gene8','gene9']]] gene_detector = GeneDetector('madansi/tests/data/assembly_7_sequences.fa', 'madansi/tests/data/blast_hits_13' ) refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/blast_hits_13', gene_detector, {}) refine_contig_neighbours_object.genes = { 'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\ 'gene5':'Contig2', 'gene6':'Contig2', 'gene7':'Contig2', \ 'gene10':'Contig3', 'gene11':'Contig3', 'gene13':'Contig3',\ 'gene12':'Contig3'} refine_contig_neighbours_object.refine_contig_neighbours() self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), sorted(neighbouring_contigs))
def test_finding_contig_ends_multiple_genes_same_side_of_closer_gene(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene3', 'gene2'), ('gene3', 'gene4'), ('gene4', 'gene5')]) neighbouring_contigs = [[('Contig1', 'Contig2'), 1, ['gene3', 'gene4']]] gene_detector = GeneDetector( 'madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') refine_contig_neighbours_object = RefineContigNeighbours( neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, { 'Contig1': ['', '', 1000], 'Contig2': ['', '', 800] }) refine_contig_neighbours_object.ends_of_contigs() self.assertTrue(refine_contig_neighbours_object.contig_ends == {'Contig1':{'Contig2':[980,3]}, 'Contig2':{'Contig1':[2,301]}} or \ refine_contig_neighbours_object.contig_ends == {'Contig1':{'Contig2':[980,490]}, 'Contig2':{'Contig1':[2,301]}})
def test_add_to_contig_appearances(self): filtered_graph = nx.Graph() gene_detector = GeneDetector( 'madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') neighbouring_contigs = [] refine_contig_neighbours_object = RefineContigNeighbours( neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, { 'Contig1': ['', '', 1000], 'Contig2': ['', '', 800], 'Contig3': ['', '', 2000] }) self.assertDictEqual( refine_contig_neighbours_object.add_to_contig_appearance( 'gene1', {}, 0), {'Contig1': [1, { 0: ['gene1'] }]}) self.assertDictEqual( refine_contig_neighbours_object.add_to_contig_appearance( 'gene1', {'Contig1': [0, {}]}, 1), {'Contig1': [1, { 1: ['gene1'] }]}) self.assertDictEqual( refine_contig_neighbours_object.add_to_contig_appearance( 'gene1', {'Contig1': [1, { 0: ['gene2'] }]}, 1), {'Contig1': [2, { 0: ['gene2'], 1: ['gene1'] }]}) self.assertDictEqual( refine_contig_neighbours_object.add_to_contig_appearance( 'gene1', {'Contig1': [2, { 1: ['gene2'], 2: ['gene3'] }]}, 2), {'Contig1': [3, { 1: ['gene2'], 2: ['gene1', 'gene3'] }]})
def test_contigs_to_genes_four_hits(self): """Tests output for multiple blast hits""" gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/four_blast_hits') self.assertCountEqual(list(gene_detector.contigs_to_genes().keys()), ['Contig1', 'Contig2', 'Contig3']) self.assertCountEqual( list(gene_detector.contigs_to_genes() ['Contig1'].gene_objects.keys()), ['gene1', 'gene2', 'gene3']) self.assertCountEqual( list(gene_detector.contigs_to_genes() ['Contig2'].gene_objects.keys()), ['geneA']) self.assertCountEqual( list(gene_detector.contigs_to_genes() ['Contig3'].gene_objects.keys()), [])
def test_two_contigs_unconnected(self): filtered_graph = nx.Graph() filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'), ('gene4', 'gene5')]) gene_detector = GeneDetector( 'madansi/tests/data/assembly.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file') filtered_blast_hits_file = 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file' expected_ordered_contig_graph = nx.Graph() produced_ordered_contig_graph = ProduceOrderedContigGraph( gene_detector, filtered_graph, filtered_graph, filtered_blast_hits_file, { 'Contig1': ['', '', 1000], 'Contig2': ['', '', 800], 'Contig3': ['', '', 2000] }) self.assertTrue( nx.is_isomorphic( expected_ordered_contig_graph, produced_ordered_contig_graph.produce_ordered_contig_graph()))
def test_orientate_edges_one_edge(self): gene_detector = GeneDetector( 'madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/seven_blast_hits') contig_graph_1 = nx.Graph() contig_graph_2 = nx.Graph() contig_graph_3 = nx.Graph() contig_graph_4 = nx.Graph() contig_graph_1.add_edge('Contig2', 'Contig4', weight=0.5) contig_graph_2.add_edge('Contig2', 'Contig3', weight=0.5) contig_graph_3.add_edge('Contig1', 'Contig2', weight=0.5) contig_graph_4.add_edge('Contig1', 'Contig3', weight=0.5) contig_orientation_1_object = ContigOrientation( contig_graph_1, gene_detector) contig_orientation_1_object.repeat_all_connected_components() contig_orientation_2_object = ContigOrientation( contig_graph_2, gene_detector) contig_orientation_2_object.repeat_all_connected_components() contig_orientation_3_object = ContigOrientation( contig_graph_3, gene_detector) contig_orientation_3_object.repeat_all_connected_components() contig_orientation_4_object = ContigOrientation( contig_graph_4, gene_detector) contig_orientation_4_object.repeat_all_connected_components() self.assertEqual( contig_orientation_1_object.contig_graph.edge['Contig2']['Contig4'] ['weight'], 1) self.assertEqual( contig_orientation_2_object.contig_graph.edge['Contig2']['Contig3'] ['weight'], 2) self.assertEqual( contig_orientation_3_object.contig_graph.edge['Contig1']['Contig2'] ['weight'], 3) self.assertEqual( contig_orientation_4_object.contig_graph.edge['Contig1']['Contig3'] ['weight'], 4)
def test_contigs_to_genes_no_hits(self): """Tests that the correct keys are given in the contigs object and that there is no value for each contig in this object""" gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/empty_file') self.assertCountEqual(list(gene_detector.contigs_to_genes().keys()), ['Contig1', 'Contig2', 'Contig3']) self.assertEqual(gene_detector.contigs_to_genes()['Contig1'].gene_objects, {})
def test_parse_blast_fits_empty_file(self): """Parses empty file correctly""" gene_detector= GeneDetector('madansi/tests/data/empty_file.fa', 'madansi/tests/data/empty_file') self.assertEqual(gene_detector.parse_blast_hits(), [])
def test_parse_blast_hits_one(self): """Tests that the correct type of object is obtained from parsing one blast hit""" gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/one_blast_hit') self.assertTrue(isinstance(gene_detector.parse_blast_hits()[0], BlastHit))