Ejemplo n.º 1
0
    def test_subpangraph_should_omit_edges_2(self):
        nodes = [
            pNode.Node(node_id=nid(0), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(1), base=b('C'), aligned_to=None),
            pNode.Node(node_id=nid(2), base=b('C'), aligned_to=None)
        ]

        sequences = {
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'),
                          [pSeq.SequencePath([*map(nid, [0, 2])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(pSeq.SequenceID('seq2'),
                          [pSeq.SequencePath([*map(nid, [0, 1, 2])])],
                          pSeq.SequenceMetadata({'group': '1'}))
        }
        poagraph = pPoagraph.Poagraph(nodes, sequences)

        translator = poa.PoagraphPOTranslator(poagraph,
                                              [pSeq.SequenceID('seq1')])
        actual_po_content = translator.get_input_po_content()
        expected_po_content = "VERSION=pangenome\n" \
                              "NAME=pangenome\n" \
                              "TITLE=pangenome\n" \
                              "LENGTH=2\n" \
                              "SOURCECOUNT=1\n" \
                              "SOURCENAME=seq1\n" \
                              "SOURCEINFO=2 0 100 -1 seq1\n" \
                              "a:S0\n" \
                              "c:L0S0"

        self.assertEqual(expected_po_content, actual_po_content)
Ejemplo n.º 2
0
    def test_subpangraph_construction_full_graph(self):
        nodes = [
            pNode.Node(node_id=nid(0), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(1), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(2), base=b('C'), aligned_to=None),
            pNode.Node(node_id=nid(3), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(4), base=b('T'), aligned_to=None)
        ]

        sequences = {
            pSeq.SequenceID('seq0'):
            pSeq.Sequence(pSeq.SequenceID('seq0'),
                          [pSeq.SequencePath([*map(nid, [0, 1, 2, 3, 4])])],
                          pSeq.SequenceMetadata({'group': '1'}))
        }
        poagraph = pPoagraph.Poagraph(nodes, sequences)
        translator = poa.PoagraphPOTranslator(poagraph,
                                              [pSeq.SequenceID('seq0')])
        actual_po_content = translator.get_input_po_content()
        expected_po_content = "VERSION=pangenome\n" \
                              "NAME=pangenome\n" \
                              "TITLE=pangenome\n" \
                              "LENGTH=5\n" \
                              "SOURCECOUNT=1\n" \
                              "SOURCENAME=seq0\n" \
                              "SOURCEINFO=5 0 100 -1 seq0\n" \
                              "a:S0\n" \
                              "a:L0S0\n" \
                              "c:L1S0\n" \
                              "a:L2S0\n" \
                              "t:L3S0"
        self.assertEqual(expected_po_content, actual_po_content)
Ejemplo n.º 3
0
    def test_subpangraph_unfilled_nodes(self):
        symbol_for_uknown = '?'
        nodes = [
            pNode.Node(node_id=nid(0), base=b('A'), aligned_to=nid(1)),
            pNode.Node(node_id=nid(1), base=b('C'), aligned_to=nid(0)),
            pNode.Node(node_id=nid(2), base=b('G'), aligned_to=None),
            pNode.Node(node_id=nid(3),
                       base=b(symbol_for_uknown),
                       aligned_to=None),
            pNode.Node(node_id=nid(4),
                       base=b(symbol_for_uknown),
                       aligned_to=None),
            pNode.Node(node_id=nid(5), base=b('G'), aligned_to=None),
            pNode.Node(node_id=nid(6), base=b('C'), aligned_to=None),
            pNode.Node(node_id=nid(7), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(5), base=b('T'), aligned_to=None)
        ]

        sequences = {
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'),
                          [pSeq.SequencePath([*map(nid, [0, 2, 3, 4, 7, 8])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(pSeq.SequenceID('seq2'),
                          [pSeq.SequencePath([*map(nid, [1, 2, 5, 6, 7, 8])])],
                          pSeq.SequenceMetadata({'group': '1'}))
        }
        poagraph = pPoagraph.Poagraph(nodes, sequences)

        translator = poa.PoagraphPOTranslator(
            poagraph, [pSeq.SequenceID('seq1'),
                       pSeq.SequenceID('seq2')])
        actual_po_content = translator.get_input_po_content()
        expected_po_content = "VERSION=pangenome\n" \
                              "NAME=pangenome\n" \
                              "TITLE=pangenome\n" \
                              "LENGTH=9\n" \
                              "SOURCECOUNT=2\n" \
                              "SOURCENAME=seq1\n" \
                              "SOURCEINFO=6 0 100 -1 seq1\n" \
                              "SOURCENAME=seq2\n" \
                              "SOURCEINFO=6 1 100 -1 seq2\n" \
                              "a:S0A1\n" \
                              "c:S1A0\n" \
                              "g:L0L1S0S1\n" \
                              f"{symbol_for_uknown}:L2S0\n" \
                              f"{symbol_for_uknown}:L3S0\n" \
                              "g:L2S1\n" \
                              "c:L5S1\n" \
                              "a:L4L6S0S1\n" \
                              "t:L7S0S1"
        self.assertEqual(expected_po_content, actual_po_content)
Ejemplo n.º 4
0
    def test_2_consensuses_and_empty_sequences(self):
        expected_po_content_path = Path(self.po_files_dir + "test_2.po")

        poagraph_nodes = [
            pNode.Node(node_id=nid(0), base=bid('C'), aligned_to=nid(1)),
            pNode.Node(node_id=nid(1), base=bid('T'), aligned_to=nid(0)),
            pNode.Node(node_id=nid(2), base=bid('A'), aligned_to=nid(3)),
            pNode.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)),
            pNode.Node(node_id=nid(4), base=bid('C'), aligned_to=None),
            pNode.Node(node_id=nid(5), base=bid('T'), aligned_to=None),
            pNode.Node(node_id=nid(6), base=bid('A'), aligned_to=nid(7)),
            pNode.Node(node_id=nid(7), base=bid('T'), aligned_to=nid(6)),
            pNode.Node(node_id=nid(8), base=bid('G'), aligned_to=None)
        ]

        poagraph_sequences = {
            pSeq.SequenceID('seq0'):
            pSeq.Sequence(pSeq.SequenceID('seq0'),
                          [pSeq.SequencePath([*map(nid, [0, 3, 4, 5, 6, 8])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'),
                          [pSeq.SequencePath([*map(nid, [1, 2, 4, 5, 7, 8])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(pSeq.SequenceID('seq2'), [],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq3'):
            pSeq.Sequence(pSeq.SequenceID('seq3'), [],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('CONSENS0'):
            pSeq.Sequence(pSeq.SequenceID('CONSENS0'),
                          [pSeq.SequencePath([*map(nid, [0, 3, 4, 5, 7, 8])])],
                          None),
            pSeq.SequenceID('CONSENS1'):
            pSeq.Sequence(pSeq.SequenceID('CONSENS1'),
                          [pSeq.SequencePath([*map(nid, [1, 2, 4, 5, 6, 8])])],
                          None),
        }

        poagraph = pPoagraph.Poagraph(poagraph_nodes, poagraph_sequences)

        actual_po_content = PangenomePO.poagraph_to_PangenomePO(poagraph)
        expected_po_content = pathtools.get_file_content(
            expected_po_content_path)
        self.assertEqual(expected_po_content, actual_po_content)
Ejemplo n.º 5
0
    def setUp(self):
        self.fasta_dir = 'tests/output/fasta_files/'

        poagraph_nodes = [
            pNode.Node(node_id=nid(0), base=bid('A'), aligned_to=nid(1)),
            pNode.Node(node_id=nid(1), base=bid('G'), aligned_to=nid(0)),
            pNode.Node(node_id=nid(2), base=bid('C'), aligned_to=nid(3)),
            pNode.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)),
            pNode.Node(node_id=nid(4), base=bid('A'), aligned_to=nid(5)),
            pNode.Node(node_id=nid(5), base=bid('T'), aligned_to=nid(4)),
            pNode.Node(node_id=nid(6), base=bid('G'), aligned_to=None),
            pNode.Node(node_id=nid(7), base=bid('G'), aligned_to=None),
            pNode.Node(node_id=nid(8), base=bid('A'), aligned_to=nid(9)),
            pNode.Node(node_id=nid(9), base=bid('C'), aligned_to=nid(10)),
            pNode.Node(node_id=nid(10), base=bid('G'), aligned_to=nid(11)),
            pNode.Node(node_id=nid(11), base=bid('T'), aligned_to=nid(8)),
            pNode.Node(node_id=nid(12), base=bid('A'), aligned_to=nid(13)),
            pNode.Node(node_id=nid(13), base=bid('C'), aligned_to=nid(12)),
            pNode.Node(node_id=nid(14), base=bid('T'), aligned_to=None),
            pNode.Node(node_id=nid(15), base=bid('A'), aligned_to=nid(16)),
            pNode.Node(node_id=nid(16), base=bid('C'), aligned_to=nid(17)),
            pNode.Node(node_id=nid(17), base=bid('G'), aligned_to=nid(15))
        ]

        poagraph_sequences = {
            pSeq.SequenceID('seq0'):
            pSeq.Sequence(pSeq.SequenceID('seq0'), [
                pSeq.SequencePath([*map(nid, [0, 2, 4, 6, 7, 8, 12, 14, 16])])
            ], pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'), [],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(pSeq.SequenceID('seq2'), [
                pSeq.SequencePath([*map(nid, [3, 4, 6, 7, 10, 12])]),
                pSeq.SequencePath([*map(nid, [14, 17])])
            ], pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq3'):
            pSeq.Sequence(pSeq.SequenceID('seq3'), [
                pSeq.SequencePath([*map(nid, [11])]),
                pSeq.SequencePath([*map(nid, [13, 14, 15])])
            ], pSeq.SequenceMetadata({'group': '1'})),
        }

        self.poagraph = pPoagraph.Poagraph(poagraph_nodes, poagraph_sequences)
Ejemplo n.º 6
0
    def test_1_typical_pangraph(self):
        expected_po_content_path = Path(self.po_files_dir + "test_1.po")

        poagraph_nodes = [
            pNode.Node(node_id=nid(0), base=bid('A'), aligned_to=nid(1)),
            pNode.Node(node_id=nid(1), base=bid('G'), aligned_to=nid(0)),
            pNode.Node(node_id=nid(2), base=bid('C'), aligned_to=nid(3)),
            pNode.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)),
            pNode.Node(node_id=nid(4), base=bid('A'), aligned_to=nid(5)),
            pNode.Node(node_id=nid(5), base=bid('T'), aligned_to=nid(4)),
            pNode.Node(node_id=nid(6), base=bid('G'), aligned_to=None),
            pNode.Node(node_id=nid(7), base=bid('G'), aligned_to=None),
            pNode.Node(node_id=nid(8), base=bid('A'), aligned_to=nid(9)),
            pNode.Node(node_id=nid(9), base=bid('C'), aligned_to=nid(10)),
            pNode.Node(node_id=nid(10), base=bid('G'), aligned_to=nid(11)),
            pNode.Node(node_id=nid(11), base=bid('T'), aligned_to=nid(8)),
            pNode.Node(node_id=nid(12), base=bid('A'), aligned_to=nid(13)),
            pNode.Node(node_id=nid(13), base=bid('C'), aligned_to=nid(12)),
            pNode.Node(node_id=nid(14), base=bid('T'), aligned_to=None),
            pNode.Node(node_id=nid(15), base=bid('A'), aligned_to=nid(16)),
            pNode.Node(node_id=nid(16), base=bid('C'), aligned_to=nid(17)),
            pNode.Node(node_id=nid(17), base=bid('G'), aligned_to=nid(15))
        ]

        poagraph_sequences = {
            pSeq.SequenceID('seq0'):
            pSeq.Sequence(pSeq.SequenceID('seq0'), [
                pSeq.SequencePath([*map(nid, [0, 2, 4, 6, 7, 8, 12, 14, 16])])
            ], pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'),
                          [pSeq.SequencePath([*map(nid, [1, 2, 5, 6, 7, 9])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(
                pSeq.SequenceID('seq2'),
                [pSeq.SequencePath([*map(nid, [3, 4, 6, 7, 10, 12, 14, 17])])],
                pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq3'):
            pSeq.Sequence(pSeq.SequenceID('seq3'),
                          [pSeq.SequencePath([*map(nid, [11, 13, 14, 15])])],
                          pSeq.SequenceMetadata({'group': '1'})),
        }

        poagraph = pPoagraph.Poagraph(poagraph_nodes, poagraph_sequences)

        actual_po_content = PangenomePO.poagraph_to_PangenomePO(poagraph)
        expected_po_content = pathtools.get_file_content(
            expected_po_content_path)
        self.assertEqual(expected_po_content, actual_po_content)
Ejemplo n.º 7
0
    def setUp(self):
        nodes = [
            pNode.Node(
                node_id=nid(0),
                base=b('T'),
                aligned_to=None,
            ),
            pNode.Node(node_id=nid(1), base=b('A'), aligned_to=nid(2)),
            pNode.Node(node_id=nid(2), base=b('G'), aligned_to=nid(1)),
            pNode.Node(node_id=nid(3), base=b('A'), aligned_to=nid(4)),
            pNode.Node(node_id=nid(4), base=b('C'), aligned_to=nid(3)),
            pNode.Node(node_id=nid(5), base=b('A'), aligned_to=nid(6)),
            pNode.Node(node_id=nid(6), base=b('C'), aligned_to=nid(7)),
            pNode.Node(node_id=nid(7), base=b('G'), aligned_to=nid(8)),
            pNode.Node(node_id=nid(8), base=b('T'), aligned_to=nid(5)),
            pNode.Node(node_id=nid(9), base=b('A'), aligned_to=None),
            pNode.Node(node_id=nid(10), base=b('C'), aligned_to=nid(11)),
            pNode.Node(node_id=nid(11), base=b('T'), aligned_to=nid(10)),
            pNode.Node(node_id=nid(12), base=b('G'), aligned_to=None),
            pNode.Node(node_id=nid(13), base=b('A'), aligned_to=nid(14)),
            pNode.Node(node_id=nid(14), base=b('C'), aligned_to=nid(13))
        ]

        sequences = {
            pSeq.SequenceID('seq0'):
            pSeq.Sequence(
                pSeq.SequenceID('seq0'),
                [pSeq.SequencePath([*map(nid, [0, 1, 3, 5, 9, 10, 13])])],
                pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq1'):
            pSeq.Sequence(pSeq.SequenceID('seq1'),
                          [pSeq.SequencePath([*map(nid, [1, 3, 6, 9, 11])])],
                          pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq2'):
            pSeq.Sequence(
                pSeq.SequenceID('seq2'),
                [pSeq.SequencePath([*map(nid, [2, 4, 7, 9, 11, 12])])],
                pSeq.SequenceMetadata({'group': '1'})),
            pSeq.SequenceID('seq3'):
            pSeq.Sequence(
                pSeq.SequenceID('seq3'),
                [pSeq.SequencePath([*map(nid, [2, 4, 8, 9, 11, 12, 14])])],
                pSeq.SequenceMetadata({'group': '1'})),
        }

        self.poagraph = pPoagraph.Poagraph(nodes, sequences)