예제 #1
0
 def test_contigs_to_genes_four_hits(self):
     """Tests output for multiple blast hits"""
     gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/four_blast_hits' )
     self.assertCountEqual(list(gene_detector.contigs_to_genes().keys()), ['Contig1', 'Contig2', 'Contig3'])
     self.assertCountEqual(list(gene_detector.contigs_to_genes()['Contig1'].gene_objects.keys()), ['gene1', 'gene2', 'gene3'])
     self.assertCountEqual(list(gene_detector.contigs_to_genes()['Contig2'].gene_objects.keys()), ['geneA'])
     self.assertCountEqual(list(gene_detector.contigs_to_genes()['Contig3'].gene_objects.keys()), [])
예제 #2
0
 def test_contigs_to_genes_no_hits(self):
     """Tests that the correct keys are given in the contigs object and that there is no value for each contig in this object"""
     gene_detector = GeneDetector('madansi/tests/data/assembly.fa',
                                  'madansi/tests/data/empty_file')
     self.assertCountEqual(list(gene_detector.contigs_to_genes().keys()),
                           ['Contig1', 'Contig2', 'Contig3'])
     self.assertEqual(
         gene_detector.contigs_to_genes()['Contig1'].gene_objects, {})
 def test_finding_contig_ends_gene_degree_one(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene9', 'gene8'), ('gene8', 'gene7'),
                                    ('gene7', 'gene6'), ('gene7', 'gene4'),
                                    ('gene4', 'gene5')])
     neighbouring_contigs = [[('Contig3', 'Contig2'), 1, ['gene7',
                                                          'gene4']]]
     gene_detector = GeneDetector(
         'madansi/tests/data/assembly_4_sequences.fa',
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours_object = RefineContigNeighbours(
         neighbouring_contigs, filtered_graph, filtered_graph,
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file',
         gene_detector, {
             'Contig2': ['', '', 800],
             'Contig3': ['', '', 2000]
         })
     self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(),
                          {
                              'Contig3': {
                                  'Contig2': [250, 700]
                              },
                              'Contig2': {
                                  'Contig3': [2, 301]
                              }
                          })
 def test_one_gene_on_contig(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene4', 'gene5'), ('gene5', 'gene10'),
                                    ('gene10', 'geneA')])
     neighbouring_contigs = [[('Contig2', 'Contig4'), 1,
                              ['gene5', 'gene10']]]
     gene_detector = GeneDetector(
         'madansi/tests/data/assembly_4_sequences.fa',
         'madansi/tests/data/refine_contig_neighbours_10_blast_hits_file')
     refine_contig_neighbours_object = RefineContigNeighbours(
         neighbouring_contigs, filtered_graph, filtered_graph,
         'madansi/tests/data/refine_contig_neighbours_10_blast_hits_file',
         gene_detector, {
             'Contig2': ['', '', 800],
             'Contig4': ['', '', 50]
         })
     self.assertDictEqual(refine_contig_neighbours_object.ends_of_contigs(),
                          {
                              'Contig4': {
                                  'Contig2': [5, 50]
                              },
                              'Contig2': {
                                  'Contig4': [301, 2]
                              }
                          })
 def test_most_occurent_contig_not_in_neighbours(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene4'), ('gene4', 'gene5'), ('gene2', 'gene6'), ('gene6', 'gene7'), ('gene7', 'gene8'), ('gene4', 'gene8')])
     neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene2','gene4']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {})
     self.assertEqual(sorted(refine_contig_neighbours.refine_contig_neighbours()), [])
 def test_finds_orientation_two_contigs(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'geneA'),\
                                     ('geneA', 'geneB'), ('geneB', 'gene4'), ('gene4', 'gene5')])
     neighbouring_contigs = [[('Contig1', 'Contig2'), 2, ['geneA',
                                                          'geneB']]]
     gene_detector = GeneDetector(
         'madansi/tests/data/assembly_4_sequences.fa',
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours_object = RefineContigNeighbours(
         neighbouring_contigs, filtered_graph, filtered_graph,
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file',
         gene_detector, {
             'Contig1': ['', '', 1000],
             'Contig2': ['', '', 800]
         })
     refine_contig_neighbours_object.contigs = { 'Contig1':{'gene1':None, 'gene2':None, 'gene3':None},\
                                                 'Contig2':{'gene4':None, 'gene5':None}}
     refine_contig_neighbours_object.genes = {   'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\
                                                 'gene4':'Contig2', 'gene5':'Contig2'}
     expected_dict = {
         'Contig1': {
             'Contig2': [980, 490]
         },
         'Contig2': {
             'Contig1': [2, 301]
         }
     }
     refine_contig_neighbours_object.ends_of_contigs()
     self.assertDictEqual(refine_contig_neighbours_object.contig_ends,
                          expected_dict)
 def test_keep_three_cycle_two_equal_weights(self):
     """Tests that when there are three cycles with all connections having equal weight, these are preserved under refinement"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\
                                     ('gene4', 'gene5'), ('gene6', 'gene7'), ('gene7', 'gene8'),\
                                     ('gene8', 'gene1')])
     
     neighbouring_contigs = [[('Contig1', 'Contig2'), 1, ['gene3', 'gene4']],\
                             [('Contig2', 'Contig3'), 1, ['gene5', 'gene6']],\
                             [('Contig1', 'Contig3'), 1, ['gene8', 'gene1']]]
     
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {})
     
     refine_contig_neighbours_object.contigs = { 'Contig1':{'gene1':None, 'gene2':None, 'gene3':None},\
                                                 'Contig2':{'gene4':None, 'gene5':None}              ,\
                                                 'Contig3':{'gene6':None, 'gene7':None, 'gene8':None}}
                                                 
     refine_contig_neighbours_object.genes = {   'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\
                                                 'gene4':'Contig2', 'gene5':'Contig2', 'gene6':'Contig3',\
                                                 'gene7':'Contig3', 'gene8':'Contig3'}
     
     self.assertEqual(sorted(refine_contig_neighbours_object.refine_contig_neighbours()), sorted(neighbouring_contigs))
 def test_contig_appearances(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'), ('gene4', 'gene5')])
     neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene3','gene4']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {})
     self.assertDictEqual(refine_contig_neighbours.find_contig_appearances(neighbouring_contigs[0]),{'Contig1':[3, {0:['gene3'], 1:['gene2'], 2:['gene1']}],\
                                                                                                     'Contig2':[2, {0:['gene4'], 1:['gene5']}]})
 def test_keep_all_connections_empty_list(self):
     """Tests an empty graph and initial list of neighbouring contigs"""
     filtered_graph = nx.Graph()
     neighbouring_contigs = []
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' )
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/empty_file',gene_detector, {})
     refine_contig_neighbours_object.genes = {}
     refine_contig_neighbours_object.refine_contig_neighbours()
     self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), sorted(neighbouring_contigs))
예제 #10
0
 def test_no_difference(self):
     """Tests the case when all the contigs in the assembly file are also given in the filtered blast hits file"""
     gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/test_blast_hits')
     output_file = 'output.fa'
     unused_contigs = UnusedContigs(gene_detector, output_file, 'madansi/tests/data/assembly.fa')
     unused_contigs.contigs_not_in_filtered_file()
     unused_contigs.add_unused_contigs_to_end()
     self.assertTrue(filecmp.cmp(output_file, 'madansi/tests/data/empty_file'))
     os.unlink(output_file)
예제 #11
0
 def test_missing_sequence(self):
     """Tests when there is a difference between the contigs present in the assembly file and those where a gene is present"""
     gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/test_blast_hits_2')
     output_file = 'output'
     unused_contigs = UnusedContigs(gene_detector, output_file, 'madansi/tests/data/assembly.fa')
     unused_contigs.contigs_not_in_filtered_file()
     unused_contigs.add_unused_contigs_to_end()
     self.assertTrue(filecmp.cmp(output_file, 'madansi/tests/data/contig3'))
     os.unlink(output_file)
예제 #12
0
 def test_set_expansion_one_contig_not_isolated(self):
     gene_detector = GeneDetector('madansi/tests/data/assembly.fa',
                                  'madansi/tests/data/test_blast_hits')
     filtered_graph = nx.Graph(
         nx.drawing.nx_pydot.read_dot('madansi/tests/data/test_graph.dot'))
     contig_searching = ContigSearching(gene_detector, filtered_graph)
     self.assertEqual(
         contig_searching.set_expansion(['gene1', 'gene2', 'gene3'],
                                        'Contig1').finished_contigs, set())
 def test_loop_of_genes_between_contigs(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'geneA'), ('geneA', 'geneB'), ('geneB', 'gene4'), ('gene4', 'gene5'),\
                                     ('gene3', 'gene6'), ('gene6', 'gene7'), ('gene7', 'gene8'), ('gene8', 'gene9'), ('gene9', 'geneB')])
     neighbouring_contigs = [[('Contig1', 'Contig2'),2, ['geneA', 'geneB']], [('Contig2', 'Contig3'), 1, ['geneB']], [('Contig1', 'Contig3'),1,['geneA']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph,'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {})
     expected_neighbours = [[('Contig2', 'Contig3'), 1, ['geneB']], [('Contig1', 'Contig3'),1,['geneA']]]
     self.assertEqual(sorted(refine_contig_neighbours.refine_contig_neighbours()), sorted(expected_neighbours))
예제 #14
0
 def test_one_contig_dummy_genes(self):
     gene_detector = GeneDetector('madansi/tests/data/assembly.fa',
                                  'madansi/tests/data/one_blast_hit')
     filtered_graph = nx.Graph(
         nx.drawing.nx_pydot.read_dot(
             'madansi/tests/data/one_contig_dummy_genes.dot'))
     contig_searching = ContigSearching(gene_detector, filtered_graph)
     contig_searching.expand_all_contigs()
     contig_neighbourhoods = contig_searching.neighbouring_contigs
     self.assertEqual(contig_neighbourhoods, [])
예제 #15
0
 def test_initialisation(self):
     gene_detector = GeneDetector('madansi/tests/data/assembly.fa',
                                  'madansi/tests/data/test_blast_hits')
     filtered_graph = nx.Graph(
         nx.drawing.nx_pydot.read_dot(
             'madansi/tests/data/graph_3_nodes.dot'))
     contig_searching = ContigSearching(gene_detector, filtered_graph)
     self.assertDictEqual(contig_searching.genes_in_contig_radius, {})
     self.assertCountEqual(contig_searching.neighbouring_contigs, [])
     self.assertEqual(contig_searching.finished_contigs, set())
예제 #16
0
    def test_find_contig_orientation(self):
        gene_detector = GeneDetector('madansi/tests/data/assembly.fa',
                                     'madansi/tests/data/four_blast_hits')
        contig_orientation_object = ContigOrientation(nx.Graph(),
                                                      gene_detector)

        self.assertEqual(
            contig_orientation_object.find_contig_orientation('Contig1'), -1)
        self.assertEqual(
            contig_orientation_object.find_contig_orientation('Contig2'), 1)
예제 #17
0
 def test_expand_all_contigs_three_contigs(self):
     gene_detector = GeneDetector('madansi/tests/data/assembly.fa',
                                  'madansi/tests/data/test_blast_hits')
     filtered_graph = nx.Graph(
         nx.drawing.nx_pydot.read_dot('madansi/tests/data/test_graph.dot'))
     contig_searching = ContigSearching(gene_detector, filtered_graph)
     contig_searching.expand_all_contigs()
     contig_neighbourhoods = contig_searching.neighbouring_contigs
     self.assertTrue(sorted(contig_neighbourhoods) == sorted([   [sorted(('Contig1', 'Contig2')),1,sorted(['gene1', 'gene4', 'gene3', 'gene5'])] ,\
                                                                 [sorted(('Contig1', 'Contig3')),1,sorted(['gene2', 'gene6'])] ]))
예제 #18
0
 def test_compare_graphs(self):
     gene_detector = GeneDetector('madansi/tests/data/assembly_7_sequences.fa', 'madansi/tests/data/test_blast_hits_2')
     output_file = 'output'
     unused_contigs = UnusedContigs(gene_detector, output_file, 'madansi/tests/data/assembly_7_sequences.fa')
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('Contig1', 'Contig2'), ('Contig2', 'Contig3'), ('Contig3', 'Contig4')])
     filtered_graph.add_node('Contig7')
     unused_contigs.contigs_not_in_filtered_graph(filtered_graph)
     unused_contigs.add_unused_contigs_to_end()
     self.assertTrue(filecmp.cmp(output_file, 'madansi/tests/data/test_unused_contigs_in_graph'))
     os.unlink(output_file)
 def test_single_gene_from_a_contig(self):
     """Tests when there are two contigs present, one with a single gene on it when other genes from the same contig are elsewhere in the graph"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'), ('gene4', 'gene5')])
     neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene2', 'gene3']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_5_blast_hits_file' )
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_5_blast_hits_file',gene_detector, {})
     refine_contig_neighbours_object.genes = {  'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig2',\
                                         'gene4':'Contig2', 'gene5':'Contig2'}
     refine_contig_neighbours_object.refine_contig_neighbours()
     self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), [])
 def test_keep_one_connection(self):
     """Tests that one connection in neighbouring contigs is kept under refinement"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene5'), ('gene5', 'gene6'),\
                                     ('gene6', 'gene3'), ('gene3', 'gene4')])
     neighbouring_contigs = [[('Contig1', 'Contig2'), 2, ['gene5', 'gene6']]]
     
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' )
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/empty_file',gene_detector, {})
     refine_contig_neighbours_object.genes = {'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig2', 'gene4':'Contig2'}
     refine_contig_neighbours_object.refine_contig_neighbours()
     self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), sorted(neighbouring_contigs))
예제 #21
0
 def test_expand_all_contigs_two_neighbouring_contigs(self):
     gene_detector = GeneDetector('madansi/tests/data/assembly.fa',
                                  'madansi/tests/data/four_blast_hits')
     filtered_graph = nx.Graph(
         nx.drawing.nx_pydot.read_dot(
             'madansi/tests/data/graph_4_nodes.dot'))
     contig_searching = ContigSearching(gene_detector, filtered_graph)
     contig_searching.expand_all_contigs()
     contig_neighbourhoods = contig_searching.neighbouring_contigs
     self.assertTrue(
         contig_neighbourhoods ==
         [[sorted(('Contig1', 'Contig2')), 1,
           sorted(['geneA', 'gene1'])]])
예제 #22
0
 def test_expand_all_contigs_three_contigs_multiple_iterations(self):
     gene_detector = GeneDetector(
         'madansi/tests/data/assembly.fa',
         'madansi/tests/data/test_blast_hits_three_contigs')
     filtered_graph = nx.Graph(
         nx.drawing.nx_pydot.read_dot(
             'madansi/tests/data/three_contigs_separated.dot'))
     contig_searching = ContigSearching(gene_detector, filtered_graph)
     contig_searching.expand_all_contigs()
     contig_neighbourhoods = contig_searching.neighbouring_contigs
     self.assertTrue(sorted(contig_neighbourhoods) == sorted([[  sorted(('Contig1', 'Contig2')),2, sorted(['geneA', 'geneD'])],\
                                                             [   sorted(('Contig2', 'Contig3')),2, sorted(['geneB', 'geneE'])],\
                                                             [   sorted(('Contig3', 'Contig1')),2, sorted(['geneC', 'geneF'])]]))
 def test_contig_joins_in_middle(self):
     """Tests when an intersection is found in the case where one end of the contig is closest to the middle of a second"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\
                                     ('gene4', 'gene5'), ('gene5', 'gene6') ,('gene3', 'gene7'),\
                                     ('gene7', 'gene8')])
                                     
     neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene3', 'gene7']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' )
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/empty_file',gene_detector, {})
     refine_contig_neighbours_object.genes = {  'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1', 'gene4':'Contig1',\
                                         'gene5':'Contig1', 'gene6':'Contig1', 'gene7':'Contig2', 'gene8':'Contig2'}
 
     self.assertEqual(sorted(refine_contig_neighbours_object.refine_contig_neighbours()), [])
 def test_three_contigs_together(self):
     """Tests when there are three contigs within a small distance of the intersection points"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene3', 'gene4'), ('gene5', 'gene6'),\
                                     ('gene7','gene2'), ('gene7','gene3'), ('gene7', 'gene5')])
                                     
     neighbouring_contigs =  [[('Contig1', 'Contig2'),1,['gene7']], [('Contig2', 'Contig3'),1,['gene7']],\
                             [('Contig1', 'Contig3'),1,['gene7']]]
                             
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' )
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph,'madansi/tests/data/empty_file',gene_detector, {})
     refine_contig_neighbours_object.genes = {  'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig2',\
                                         'gene4':'Contig2', 'gene5':'Contig3', 'gene6':'Contig3'}
     refine_contig_neighbours_object.refine_contig_neighbours()
     self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), [])
예제 #25
0
 def test_two_separated_sections(self):
     gene_detector = GeneDetector(
         'madansi/tests/data/assembly_4_sequences.fa',
         'madansi/tests/data/seven_blast_hits')
     filtered_graph = nx.Graph(
         nx.drawing.nx_pydot.read_dot(
             'madansi/tests/data/two_separated_sections.dot'))
     contig_searching = ContigSearching(gene_detector, filtered_graph)
     contig_searching.expand_all_contigs()
     contig_neighbourhoods = contig_searching.neighbouring_contigs
     possible_expected_lists = \
     [sorted([('Contig1', 'Contig2', 1), ('Contig3', 'Contig4', 2)]),\
      sorted([('Contig2', 'Contig1', 1), ('Contig3', 'Contig4', 2)]),\
      sorted([('Contig1', 'Contig2', 1), ('Contig4', 'Contig3', 2)]),\
      sorted([('Contig2', 'Contig1', 1), ('Contig4', 'Contig3', 2)])]
     self.assertTrue(sorted(contig_neighbourhoods) == sorted([[  sorted(('Contig1', 'Contig2')), 1, sorted(['gene2', 'gene3'])],\
                                                              [  sorted(('Contig3', 'Contig4')), 2, sorted(['geneA', 'geneB'])]]))
예제 #26
0
 def test_contigs_to_genes_one_hit(self):
     """Tests that given one blast hit, will give the correct keys and values"""
     gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/one_blast_hit')
     self.assertCountEqual(list(gene_detector.contigs_to_genes().keys()), ['Contig1', 'Contig2', 'Contig3'])
     
     self.assertEqual(gene_detector.contigs_to_genes()['Contig2'].gene_objects, {})
     self.assertEqual(gene_detector.contigs_to_genes()['Contig3'].gene_objects, {})
     self.assertCountEqual(list(gene_detector.contigs_to_genes()['Contig1'].gene_objects.keys()), ['gene1'])
 
     my_gene = Gene(-1,402,1,None, 'Contig1',3)
     self.assertEqual(gene_detector.contigs_to_genes()['Contig1'].gene_objects['gene1'].orientation, my_gene.orientation)
     self.assertEqual(gene_detector.contigs_to_genes()['Contig1'].gene_objects['gene1'].start, my_gene.start)
     self.assertEqual(gene_detector.contigs_to_genes()['Contig1'].gene_objects['gene1'].end, my_gene.end)
     self.assertEqual(gene_detector.contigs_to_genes()['Contig1'].gene_objects['gene1'].node, my_gene.node)
     self.assertEqual(gene_detector.contigs_to_genes()['Contig1'].gene_objects['gene1'].contig, my_gene.contig)
     self.assertEqual(gene_detector.contigs_to_genes()['Contig1'].gene_objects['gene1'].qry_start, my_gene.qry_start)
 def test_keep_two_connections(self):
    """Tests that two connections are preserved under refinement"""
    filtered_graph = nx.Graph()
    filtered_graph.add_edges_from([  ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\
                                     ('gene4', 'gene5'), ('gene5', 'gene6'), ('gene6', 'gene7'),\
                                     ('gene7', 'gene8'), ('gene8', 'gene9'), ('gene9', 'gene10'),\
                                     ('gene10', 'gene11'), ('gene11', 'gene12'), ('gene12', 'gene13')])
                                     
    neighbouring_contigs = [[('Contig1', 'Contig2'),1, ['gene4']], [('Contig2', 'Contig3'),2,['gene8','gene9']]]
    gene_detector = GeneDetector('madansi/tests/data/assembly_7_sequences.fa', 'madansi/tests/data/blast_hits_13' )
    
    refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/blast_hits_13', gene_detector, {})
    refine_contig_neighbours_object.genes = {   'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\
                                         'gene5':'Contig2', 'gene6':'Contig2', 'gene7':'Contig2', \
                                         'gene10':'Contig3', 'gene11':'Contig3', 'gene13':'Contig3',\
                                         'gene12':'Contig3'}
    refine_contig_neighbours_object.refine_contig_neighbours()
    self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), sorted(neighbouring_contigs))                        
 def test_finding_contig_ends_multiple_genes_same_side_of_closer_gene(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene3', 'gene2'),
                                    ('gene3', 'gene4'), ('gene4', 'gene5')])
     neighbouring_contigs = [[('Contig1', 'Contig2'), 1, ['gene3',
                                                          'gene4']]]
     gene_detector = GeneDetector(
         'madansi/tests/data/assembly_4_sequences.fa',
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours_object = RefineContigNeighbours(
         neighbouring_contigs, filtered_graph, filtered_graph,
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file',
         gene_detector, {
             'Contig1': ['', '', 1000],
             'Contig2': ['', '', 800]
         })
     refine_contig_neighbours_object.ends_of_contigs()
     self.assertTrue(refine_contig_neighbours_object.contig_ends == {'Contig1':{'Contig2':[980,3]}, 'Contig2':{'Contig1':[2,301]}} or \
                     refine_contig_neighbours_object.contig_ends == {'Contig1':{'Contig2':[980,490]}, 'Contig2':{'Contig1':[2,301]}})
 def test_add_to_contig_appearances(self):
     filtered_graph = nx.Graph()
     gene_detector = GeneDetector(
         'madansi/tests/data/assembly_4_sequences.fa',
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     neighbouring_contigs = []
     refine_contig_neighbours_object = RefineContigNeighbours(
         neighbouring_contigs, filtered_graph, filtered_graph,
         'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file',
         gene_detector, {
             'Contig1': ['', '', 1000],
             'Contig2': ['', '', 800],
             'Contig3': ['', '', 2000]
         })
     self.assertDictEqual(
         refine_contig_neighbours_object.add_to_contig_appearance(
             'gene1', {}, 0), {'Contig1': [1, {
                 0: ['gene1']
             }]})
     self.assertDictEqual(
         refine_contig_neighbours_object.add_to_contig_appearance(
             'gene1', {'Contig1': [0, {}]}, 1),
         {'Contig1': [1, {
             1: ['gene1']
         }]})
     self.assertDictEqual(
         refine_contig_neighbours_object.add_to_contig_appearance(
             'gene1', {'Contig1': [1, {
                 0: ['gene2']
             }]}, 1), {'Contig1': [2, {
                 0: ['gene2'],
                 1: ['gene1']
             }]})
     self.assertDictEqual(
         refine_contig_neighbours_object.add_to_contig_appearance(
             'gene1', {'Contig1': [2, {
                 1: ['gene2'],
                 2: ['gene3']
             }]}, 2),
         {'Contig1': [3, {
             1: ['gene2'],
             2: ['gene1', 'gene3']
         }]})
예제 #30
0
 def test_contigs_to_genes_four_hits(self):
     """Tests output for multiple blast hits"""
     gene_detector = GeneDetector('madansi/tests/data/assembly.fa',
                                  'madansi/tests/data/four_blast_hits')
     self.assertCountEqual(list(gene_detector.contigs_to_genes().keys()),
                           ['Contig1', 'Contig2', 'Contig3'])
     self.assertCountEqual(
         list(gene_detector.contigs_to_genes()
              ['Contig1'].gene_objects.keys()), ['gene1', 'gene2', 'gene3'])
     self.assertCountEqual(
         list(gene_detector.contigs_to_genes()
              ['Contig2'].gene_objects.keys()), ['geneA'])
     self.assertCountEqual(
         list(gene_detector.contigs_to_genes()
              ['Contig3'].gene_objects.keys()), [])
    def test_two_contigs_unconnected(self):
        filtered_graph = nx.Graph()
        filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'),
                                       ('gene4', 'gene5')])
        gene_detector = GeneDetector(
            'madansi/tests/data/assembly.fa',
            'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
        filtered_blast_hits_file = 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file'

        expected_ordered_contig_graph = nx.Graph()

        produced_ordered_contig_graph = ProduceOrderedContigGraph(
            gene_detector, filtered_graph, filtered_graph,
            filtered_blast_hits_file, {
                'Contig1': ['', '', 1000],
                'Contig2': ['', '', 800],
                'Contig3': ['', '', 2000]
            })
        self.assertTrue(
            nx.is_isomorphic(
                expected_ordered_contig_graph,
                produced_ordered_contig_graph.produce_ordered_contig_graph()))
예제 #32
0
    def test_orientate_edges_one_edge(self):
        gene_detector = GeneDetector(
            'madansi/tests/data/assembly_4_sequences.fa',
            'madansi/tests/data/seven_blast_hits')

        contig_graph_1 = nx.Graph()
        contig_graph_2 = nx.Graph()
        contig_graph_3 = nx.Graph()
        contig_graph_4 = nx.Graph()
        contig_graph_1.add_edge('Contig2', 'Contig4', weight=0.5)
        contig_graph_2.add_edge('Contig2', 'Contig3', weight=0.5)
        contig_graph_3.add_edge('Contig1', 'Contig2', weight=0.5)
        contig_graph_4.add_edge('Contig1', 'Contig3', weight=0.5)

        contig_orientation_1_object = ContigOrientation(
            contig_graph_1, gene_detector)
        contig_orientation_1_object.repeat_all_connected_components()
        contig_orientation_2_object = ContigOrientation(
            contig_graph_2, gene_detector)
        contig_orientation_2_object.repeat_all_connected_components()
        contig_orientation_3_object = ContigOrientation(
            contig_graph_3, gene_detector)
        contig_orientation_3_object.repeat_all_connected_components()
        contig_orientation_4_object = ContigOrientation(
            contig_graph_4, gene_detector)
        contig_orientation_4_object.repeat_all_connected_components()

        self.assertEqual(
            contig_orientation_1_object.contig_graph.edge['Contig2']['Contig4']
            ['weight'], 1)
        self.assertEqual(
            contig_orientation_2_object.contig_graph.edge['Contig2']['Contig3']
            ['weight'], 2)
        self.assertEqual(
            contig_orientation_3_object.contig_graph.edge['Contig1']['Contig2']
            ['weight'], 3)
        self.assertEqual(
            contig_orientation_4_object.contig_graph.edge['Contig1']['Contig3']
            ['weight'], 4)
예제 #33
0
 def test_contigs_to_genes_no_hits(self):
     """Tests that the correct keys are given in the contigs object and that there is no value for each contig in this object"""
     gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/empty_file')
     self.assertCountEqual(list(gene_detector.contigs_to_genes().keys()), ['Contig1', 'Contig2', 'Contig3'])
     self.assertEqual(gene_detector.contigs_to_genes()['Contig1'].gene_objects, {})        
예제 #34
0
 def test_parse_blast_fits_empty_file(self):
     """Parses empty file correctly"""
     gene_detector= GeneDetector('madansi/tests/data/empty_file.fa', 'madansi/tests/data/empty_file')
     self.assertEqual(gene_detector.parse_blast_hits(), [])
예제 #35
0
 def test_parse_blast_hits_one(self):
     """Tests that the correct type of object is obtained from parsing one blast hit"""
     gene_detector = GeneDetector('madansi/tests/data/assembly.fa', 'madansi/tests/data/one_blast_hit')
     self.assertTrue(isinstance(gene_detector.parse_blast_hits()[0], BlastHit))