Beispiel #1
0
    def produce_ordered_contig_graph(self):
        contig_searching = ContigSearching(self.gene_detector,
                                           self.filtered_graph)
        contig_searching.expand_all_contigs()

        refine_neighbouring_contigs = RefineContigNeighbours(
            contig_searching.neighbouring_contigs, self.filtered_graph,
            self.unfiltered_graph, self.filtered_blast_hits_file,
            self.gene_detector, self.sequences)
        refine_neighbouring_contigs.refine_contig_neighbours()
        self.contig_ends = refine_neighbouring_contigs.ends_of_contigs()
        refined_neighbouring_contigs = refine_neighbouring_contigs.refined_neighbouring_contigs

        contig_graph_refined = ContigGraph(refined_neighbouring_contigs)
        contig_graph_unrefined = ContigGraph(
            contig_searching.neighbouring_contigs)
        graph_refined = contig_graph_refined.create_contig_subgraph()
        graph_unrefined = contig_graph_unrefined.create_contig_subgraph()

        iterate_joining_contig_components = IterateJoiningContigComponents(
            graph_unrefined)
        ordered_contig_graph = iterate_joining_contig_components.iterate_joining(
            graph_refined)

        return ordered_contig_graph
 def test_keep_all_connections_empty_list(self):
     """Tests an empty graph and initial list of neighbouring contigs"""
     filtered_graph = nx.Graph()
     neighbouring_contigs = []
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' )
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/empty_file',gene_detector, {})
     refine_contig_neighbours_object.genes = {}
     refine_contig_neighbours_object.refine_contig_neighbours()
     self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), sorted(neighbouring_contigs))
 def test_single_gene_from_a_contig(self):
     """Tests when there are two contigs present, one with a single gene on it when other genes from the same contig are elsewhere in the graph"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'), ('gene4', 'gene5')])
     neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene2', 'gene3']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_5_blast_hits_file' )
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_5_blast_hits_file',gene_detector, {})
     refine_contig_neighbours_object.genes = {  'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig2',\
                                         'gene4':'Contig2', 'gene5':'Contig2'}
     refine_contig_neighbours_object.refine_contig_neighbours()
     self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), [])
 def test_keep_one_connection(self):
     """Tests that one connection in neighbouring contigs is kept under refinement"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene5'), ('gene5', 'gene6'),\
                                     ('gene6', 'gene3'), ('gene3', 'gene4')])
     neighbouring_contigs = [[('Contig1', 'Contig2'), 2, ['gene5', 'gene6']]]
     
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' )
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/empty_file',gene_detector, {})
     refine_contig_neighbours_object.genes = {'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig2', 'gene4':'Contig2'}
     refine_contig_neighbours_object.refine_contig_neighbours()
     self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), sorted(neighbouring_contigs))
 def test_three_contigs_together(self):
     """Tests when there are three contigs within a small distance of the intersection points"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene3', 'gene4'), ('gene5', 'gene6'),\
                                     ('gene7','gene2'), ('gene7','gene3'), ('gene7', 'gene5')])
                                     
     neighbouring_contigs =  [[('Contig1', 'Contig2'),1,['gene7']], [('Contig2', 'Contig3'),1,['gene7']],\
                             [('Contig1', 'Contig3'),1,['gene7']]]
                             
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' )
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph,'madansi/tests/data/empty_file',gene_detector, {})
     refine_contig_neighbours_object.genes = {  'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig2',\
                                         'gene4':'Contig2', 'gene5':'Contig3', 'gene6':'Contig3'}
     refine_contig_neighbours_object.refine_contig_neighbours()
     self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), [])
 def test_most_occurent_contig_not_in_neighbours(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene4'), ('gene4', 'gene5'), ('gene2', 'gene6'), ('gene6', 'gene7'), ('gene7', 'gene8'), ('gene4', 'gene8')])
     neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene2','gene4']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {})
     self.assertEqual(sorted(refine_contig_neighbours.refine_contig_neighbours()), [])
 def test_keep_three_cycle_two_equal_weights(self):
     """Tests that when there are three cycles with all connections having equal weight, these are preserved under refinement"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\
                                     ('gene4', 'gene5'), ('gene6', 'gene7'), ('gene7', 'gene8'),\
                                     ('gene8', 'gene1')])
     
     neighbouring_contigs = [[('Contig1', 'Contig2'), 1, ['gene3', 'gene4']],\
                             [('Contig2', 'Contig3'), 1, ['gene5', 'gene6']],\
                             [('Contig1', 'Contig3'), 1, ['gene8', 'gene1']]]
     
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {})
     
     refine_contig_neighbours_object.contigs = { 'Contig1':{'gene1':None, 'gene2':None, 'gene3':None},\
                                                 'Contig2':{'gene4':None, 'gene5':None}              ,\
                                                 'Contig3':{'gene6':None, 'gene7':None, 'gene8':None}}
                                                 
     refine_contig_neighbours_object.genes = {   'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\
                                                 'gene4':'Contig2', 'gene5':'Contig2', 'gene6':'Contig3',\
                                                 'gene7':'Contig3', 'gene8':'Contig3'}
     
     self.assertEqual(sorted(refine_contig_neighbours_object.refine_contig_neighbours()), sorted(neighbouring_contigs))
 def test_keep_two_connections(self):
    """Tests that two connections are preserved under refinement"""
    filtered_graph = nx.Graph()
    filtered_graph.add_edges_from([  ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\
                                     ('gene4', 'gene5'), ('gene5', 'gene6'), ('gene6', 'gene7'),\
                                     ('gene7', 'gene8'), ('gene8', 'gene9'), ('gene9', 'gene10'),\
                                     ('gene10', 'gene11'), ('gene11', 'gene12'), ('gene12', 'gene13')])
                                     
    neighbouring_contigs = [[('Contig1', 'Contig2'),1, ['gene4']], [('Contig2', 'Contig3'),2,['gene8','gene9']]]
    gene_detector = GeneDetector('madansi/tests/data/assembly_7_sequences.fa', 'madansi/tests/data/blast_hits_13' )
    
    refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs,filtered_graph,filtered_graph,'madansi/tests/data/blast_hits_13', gene_detector, {})
    refine_contig_neighbours_object.genes = {   'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\
                                         'gene5':'Contig2', 'gene6':'Contig2', 'gene7':'Contig2', \
                                         'gene10':'Contig3', 'gene11':'Contig3', 'gene13':'Contig3',\
                                         'gene12':'Contig3'}
    refine_contig_neighbours_object.refine_contig_neighbours()
    self.assertEqual(sorted(refine_contig_neighbours_object.refined_neighbouring_contigs), sorted(neighbouring_contigs))                        
 def test_loop_of_genes_between_contigs(self):
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'geneA'), ('geneA', 'geneB'), ('geneB', 'gene4'), ('gene4', 'gene5'),\
                                     ('gene3', 'gene6'), ('gene6', 'gene7'), ('gene7', 'gene8'), ('gene8', 'gene9'), ('gene9', 'geneB')])
     neighbouring_contigs = [[('Contig1', 'Contig2'),2, ['geneA', 'geneB']], [('Contig2', 'Contig3'), 1, ['geneB']], [('Contig1', 'Contig3'),1,['geneA']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file')
     refine_contig_neighbours = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph,'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {})
     expected_neighbours = [[('Contig2', 'Contig3'), 1, ['geneB']], [('Contig1', 'Contig3'),1,['geneA']]]
     self.assertEqual(sorted(refine_contig_neighbours.refine_contig_neighbours()), sorted(expected_neighbours))
 def produce_ordered_contig_graph(self):
     contig_searching = ContigSearching(self.gene_detector, self.filtered_graph)
     contig_searching.expand_all_contigs()
  
     refine_neighbouring_contigs = RefineContigNeighbours(contig_searching.neighbouring_contigs, self.filtered_graph, self.unfiltered_graph, self.filtered_blast_hits_file, self.gene_detector, self.sequences)
     refine_neighbouring_contigs.refine_contig_neighbours()        
     self.contig_ends            = refine_neighbouring_contigs.ends_of_contigs()
     refined_neighbouring_contigs= refine_neighbouring_contigs.refined_neighbouring_contigs
     
     contig_graph_refined    = ContigGraph(refined_neighbouring_contigs)
     contig_graph_unrefined  = ContigGraph(contig_searching.neighbouring_contigs)
     graph_refined           = contig_graph_refined.create_contig_subgraph()
     graph_unrefined         = contig_graph_unrefined.create_contig_subgraph()
     
     iterate_joining_contig_components   = IterateJoiningContigComponents(graph_unrefined)
     ordered_contig_graph                = iterate_joining_contig_components.iterate_joining(graph_refined)
     
     return ordered_contig_graph
 def test_contig_joins_in_middle(self):
     """Tests when an intersection is found in the case where one end of the contig is closest to the middle of a second"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\
                                     ('gene4', 'gene5'), ('gene5', 'gene6') ,('gene3', 'gene7'),\
                                     ('gene7', 'gene8')])
                                     
     neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene3', 'gene7']]]
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/empty_file' )
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph,filtered_graph, 'madansi/tests/data/empty_file',gene_detector, {})
     refine_contig_neighbours_object.genes = {  'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1', 'gene4':'Contig1',\
                                         'gene5':'Contig1', 'gene6':'Contig1', 'gene7':'Contig2', 'gene8':'Contig2'}
 
     self.assertEqual(sorted(refine_contig_neighbours_object.refine_contig_neighbours()), [])
 def test_short_contig_in_middle(self):
     """Tests the case when there is a contig with only two genes between longer ones, we should find that the connection that needs more iterations is removed"""
     filtered_graph = nx.Graph()
     filtered_graph.add_edges_from([ ('gene1', 'gene2'), ('gene2', 'gene3'), ('gene3', 'gene4'),\
                                     ('gene4', 'gene5'), ('gene5', 'gene6'), ('gene6', 'gene7'),\
                                     ('gene7', 'gene8')])
                                     
     neighbouring_contigs = [[('Contig1', 'Contig2'),1,['gene3','gene4']],\
                             [('Contig2', 'Contig3'),1,['gene5', 'gene6']],\
                             [('Contig1', 'Contig3'),2,['gene4', 'gene5']]]
                             
     gene_detector = GeneDetector('madansi/tests/data/assembly_4_sequences.fa', 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file' )
     
     refine_contig_neighbours_object = RefineContigNeighbours(neighbouring_contigs, filtered_graph, filtered_graph, 'madansi/tests/data/refine_contig_neighbours_9_blast_hits_file', gene_detector, {})
     refine_contig_neighbours_object.contigs = { 'Contig1': {'gene1':None, 'gene2':None, 'gene3':None},\
                                                 'Contig2':{'gene4':None, 'gene5':None},\
                                                 'Contig3':{'gene6':None, 'gene7':None, 'gene8':None}}
                                         
     refine_contig_neighbours_object.genes = {   'gene1':'Contig1', 'gene2':'Contig1', 'gene3':'Contig1',\
                                                 'gene4':'Contig2', 'gene5':'Contig2', 'gene6':'Contig3',\
                                                 'gene7':'Contig3', 'gene8':'Contig3'}
 
     self.assertEqual(sorted(refine_contig_neighbours_object.refine_contig_neighbours()), sorted([[('Contig1', 'Contig2'),1,['gene3', 'gene4']], [('Contig2', 'Contig3'),1,['gene5', 'gene6']]]))