Beispiel #1
0
    def test_y_graph_with_one_link_returns_one_node(self):
        # given
        links = LinksBuilder() \
            .with_link_for_kmer('F 1 1 C', 'AAA') \
            .build()

        b = UnitigBuilder()
        b.add_node(0, 'AAA')
        b.add_node(1, 'AAC')
        b.add_node(2, 'AAG')

        b.add_edge(0, 1)
        b.add_edge(0, 2)
        unitigs = b.build()

        # when
        traverser = LinkedGraphTraverser.from_graph_and_link_walker(
            unitigs,
            UnitigLinkWalker.from_links_unitigs_kmer_size_unitig(
                links, unitigs, 3, 0))

        # then
        for node in range(3):
            assert node in traverser
        assert [1] == list(traverser[0])
        assert [] == list(traverser[1])
Beispiel #2
0
    def test_two_ys_with_one_link_returns_both_nodes_the_second_time(self):
        # given
        links = LinksBuilder() \
            .with_link_for_kmer('F 1 1 C', 'AAA') \
            .build()

        b = UnitigBuilder()
        b.add_node(0, 'AAA')
        b.add_node(1, 'AACCC')
        b.add_node(2, 'AAGCC')
        b.add_node(3, 'CCC')
        b.add_node(4, 'CCA')
        b.add_node(5, 'CCT')

        b.add_edge(0, 1)
        b.add_edge(0, 2)
        b.add_edge(1, 3)
        b.add_edge(2, 3)
        b.add_edge(3, 4)
        b.add_edge(3, 5)
        unitigs = b.build()

        # when
        walker = UnitigLinkWalker.from_links_unitigs_kmer_size_unitig(
            links, unitigs, 3, 0)

        # then
        assert [1] == list(walker.successors())
        walker.choose(1)
        assert [3] == list(walker.successors())
        walker.choose(3)
        assert [4, 5] == sorted(walker.successors())
        walker.choose(5)
        assert [] == list(walker.successors())
Beispiel #3
0
    def test_y_graph_with_one_link_returns_one_node(self):
        # given
        links = LinksBuilder() \
            .with_link_for_kmer('F 1 1 C', 'AAA') \
            .build()

        b = UnitigBuilder()
        b.add_node(0, 'AAA')
        b.add_node(1, 'AAC')
        b.add_node(2, 'AAG')

        b.add_edge(0, 1)
        b.add_edge(0, 2)
        unitigs = b.build()

        # when
        walker = UnitigLinkWalker.from_links_unitigs_kmer_size_unitig(
            links, unitigs, 3, 0)

        # then
        assert [1] == list(walker.link_successors())
        assert [1] == list(walker.successors())
        with pytest.raises(KeyError):
            walker.choose(2)
        walker.choose(1)
        print(list(walker.unitigs.successors(1)))
        with pytest.raises(ValueError):
            list(walker.link_successors())
        assert [] == list(walker.successors())
Beispiel #4
0
    def all_simple_paths(self, extra_incoming_node=None, links=None):
        if not isinstance(self.graph, nx.Graph):
            assert self.graph.is_consistent()
        if extra_incoming_node:
            for neighbor in self.graph.pred[extra_incoming_node]:
                for color in self.graph.graph['colors']:
                    self.graph.remove_edge(neighbor, extra_incoming_node, color)
        unitig_graph = UnitigCollapser(self.graph) \
            .collapse_kmer_unitigs() \
            .unitig_graph
        unitig_graph = nx.DiGraph(unitig_graph)
        unitig_graph = nx.convert_node_labels_to_integers(unitig_graph)
        path_converter = UnitigGraphPathConverter.from_unitig_graph(unitig_graph)

        record_idx = 0
        in_nodes = sorted(list(in_nodes_of(unitig_graph)))
        logger.info(f"Found {len(in_nodes)} incoming tip nodes")
        out_nodes = set(sorted(list(out_nodes_of(unitig_graph))))
        logger.info(f"Found {len(out_nodes)} outgoing tip nodes")
        for sidx, source in enumerate(in_nodes):
            if source in out_nodes:
                logger.info('Incoming node %s; %s outgoing nodes; Path number %s',
                            sidx,
                            len(out_nodes),
                            record_idx)
                yield SeqRecord(Seq(unitig_graph.node[source]['unitig']), id=str(record_idx),
                                description='')
                record_idx += 1
                continue
            if links is not None:
                wrapped_graph = LinkedGraphTraverser.from_graph_and_link_walker(
                    unitig_graph,
                    UnitigLinkWalker.from_links_unitigs_kmer_size_unitig(
                        links,
                        unitig_graph,
                        unitig_graph.graph['kmer_size'],
                        source
                    )
                )
                paths = nx.all_simple_paths(wrapped_graph,
                                            source,
                                            out_nodes,
                                            cutoff=len(self.graph) - 1)
            else:
                paths = nx.all_simple_paths(unitig_graph,
                                            source,
                                            out_nodes,
                                            cutoff=len(self.graph) - 1)

            for pidx, path in enumerate(paths):
                if pidx % 100000 == 0:
                    logger.info('Incoming node %s; %s outgoing nodes; Path number %s', sidx,
                                len(out_nodes), record_idx)
                yield SeqRecord(
                    Seq(path_converter.to_contig(path)),
                    id=str(record_idx),
                    description='')
                record_idx += 1
Beispiel #5
0
    def test_raises_when_link_does_match_graph(self):
        # given
        links = LinksBuilder() \
            .with_link_for_kmer('F 1 1 T', 'AAA') \
            .build()

        b = UnitigBuilder()
        b.add_node(0, 'AAA')
        b.add_node(1, 'AAC')
        b.add_node(2, 'AAG')

        b.add_edge(0, 1)
        b.add_edge(0, 2)
        unitigs = b.build()

        # when
        walker = UnitigLinkWalker.from_links_unitigs_kmer_size_unitig(
            links, unitigs, 3, 0)

        # then
        with pytest.raises(ValueError,
                           match='Links do not appear to match unitigs'):
            list(walker.link_successors())
Beispiel #6
0
    def test_two_ys_with_two_links_returns_one_and_two_nodes(self):
        # given
        links = LinksBuilder() \
            .with_link_for_kmer('F 2 1 CT', 'AAA') \
            .with_link_for_kmer('F 1 1 A', 'CCC') \
            .build()

        b = UnitigBuilder()
        b.add_node(0, 'AAA')
        b.add_node(1, 'AACCC')
        b.add_node(2, 'AAGCC')
        b.add_node(3, 'CCC')
        b.add_node(4, 'CCA')
        b.add_node(5, 'CCT')

        b.add_edge(0, 1)
        b.add_edge(0, 2)
        b.add_edge(1, 3)
        b.add_edge(2, 3)
        b.add_edge(3, 4)
        b.add_edge(3, 5)
        unitigs = b.build()

        # when
        walker = UnitigLinkWalker.from_links_unitigs_kmer_size_unitig(
            links, unitigs, 3, 0)

        # then
        assert [1] == list(walker.link_successors())
        walker.choose(1)
        with pytest.raises(ValueError):
            list(walker.link_successors())
        walker.choose(3)
        assert [4, 5] == sorted(walker.link_successors())
        walker.choose(5)
        with pytest.raises(ValueError):
            list(walker.link_successors())
Beispiel #7
0
    def test_y_graph_with_one_link_returns_copy_of_walker(self):
        # given
        links = LinksBuilder() \
            .with_link_for_kmer('F 1 1 C', 'AAA') \
            .build()

        b = UnitigBuilder()
        b.add_node(0, 'AAA')
        b.add_node(1, 'AAC')
        b.add_node(2, 'AAG')

        b.add_edge(0, 1)
        b.add_edge(0, 2)
        unitigs = b.build()
        walker = UnitigLinkWalker.from_links_unitigs_kmer_size_unitig(
            links, unitigs, 3, 0)

        # when
        new_walker = copy.copy(walker)
        new_walker.choose(1)

        # then
        assert [1] == list(walker.successors())
        assert [] == list(new_walker.successors())