def setUp(self):
        nodes = [
            graph.Node(
                node_id=nid(0),
                base=b('T'),
                aligned_to=None,
            ),
            graph.Node(node_id=nid(1), base=b('A'), aligned_to=nid(2)),
            graph.Node(node_id=nid(2), base=b('G'), aligned_to=nid(1)),
            graph.Node(node_id=nid(3), base=b('A'), aligned_to=nid(4)),
            graph.Node(node_id=nid(4), base=b('C'), aligned_to=nid(3)),
            graph.Node(node_id=nid(5), base=b('A'), aligned_to=nid(6)),
            graph.Node(node_id=nid(6), base=b('C'), aligned_to=nid(7)),
            graph.Node(node_id=nid(7), base=b('G'), aligned_to=nid(8)),
            graph.Node(node_id=nid(8), base=b('T'), aligned_to=nid(5)),
            graph.Node(node_id=nid(9), base=b('A'), aligned_to=None),
            graph.Node(node_id=nid(10), base=b('C'), aligned_to=nid(11)),
            graph.Node(node_id=nid(11), base=b('T'), aligned_to=nid(10)),
            graph.Node(node_id=nid(12), base=b('G'), aligned_to=None),
            graph.Node(node_id=nid(13), base=b('A'), aligned_to=nid(14)),
            graph.Node(node_id=nid(14), base=b('C'), aligned_to=nid(13))
        ]

        sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(
                msa.SequenceID('seq0'),
                [graph.SeqPath([*map(nid, [0, 1, 3, 5, 9, 10, 13])])],
                graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
            graph.Sequence(msa.SequenceID('seq1'),
                           [graph.SeqPath([*map(nid, [1, 3, 6, 9, 11])])],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(msa.SequenceID('seq2'),
                           [graph.SeqPath([*map(nid, [2, 4, 7, 9, 11, 12])])],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq3'):
            graph.Sequence(
                msa.SequenceID('seq3'),
                [graph.SeqPath([*map(nid, [2, 4, 8, 9, 11, 12, 14])])],
                graph.SequenceMetadata({'group': '1'})),
        }

        self.poagraph = graph.Poagraph(nodes, sequences)
예제 #2
0
def _add_node_do_sequence(sequence: graph.Sequence, node_id: graph.NodeID) -> \
        graph.Sequence:
    if sequence.paths:
        a = graph.SeqPath([node_id])
        updated_path = graph.SeqPath(sequence.paths[-1] + a)
        newpaths = [sequence.paths[:-1] + updated_path]
    else:
        newpaths = [graph.SeqPath([node_id])]
    return graph.Sequence(sequence.seqid, newpaths, sequence.seqmetadata)
    def test_02_seq_starts_in_second_block(self):
        maf_path = self.maf_files_dir.joinpath(
                        "test_2_seq_starts_in_second_block.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('C'), aligned_to=None, block_id=bid(0)),
            graph.Node(node_id=nid(1), base=graph.Base('T'), aligned_to=None, block_id=bid(0)),
            graph.Node(node_id=nid(2), base=graph.Base('G'), aligned_to=None, block_id=bid(0)),

            graph.Node(node_id=nid(3), base=graph.Base('T'), aligned_to=None, block_id=bid(1)),

            graph.Node(node_id=nid(4), base=graph.Base('G'), aligned_to=nid(5), block_id=bid(2)),
            graph.Node(node_id=nid(5), base=graph.Base('T'), aligned_to=nid(4), block_id=bid(2)),
            graph.Node(node_id=nid(6), base=graph.Base('A'), aligned_to=None, block_id=bid(2)),
            graph.Node(node_id=nid(7), base=graph.Base('A'), aligned_to=None, block_id=bid(2)),
            graph.Node(node_id=nid(8), base=graph.Base('C'), aligned_to=None, block_id=bid(2)),

        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [graph.SeqPath([*map(nid, [1, 2, 3])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [0, 5, 7])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [graph.SeqPath([*map(nid, [3, 4, 6, 8])])],
                               graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [],
                               graph.SequenceMetadata({'group': '2'}))
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_08_reversed_block(self):
        maf_path = self.maf_files_dir.joinpath("test_8_reversed_block.maf")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=graph.Base('C'), aligned_to=None),
            graph.Node(node_id=nid(1), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(2), base=graph.Base('T'), aligned_to=None),
            # next block is reversed because it was converted to dag
            graph.Node(node_id=nid(3), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(4), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(5), base=graph.Base('A'), aligned_to=nid(6)),
            graph.Node(node_id=nid(6), base=graph.Base('G'), aligned_to=nid(5)),
            graph.Node(node_id=nid(7), base=graph.Base('A'), aligned_to=None),
            graph.Node(node_id=nid(8), base=graph.Base('G'), aligned_to=None),
            graph.Node(node_id=nid(9), base=graph.Base('T'), aligned_to=None),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [0, 1, 3, 4, 5, 7, 8, 9])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [graph.SeqPath([*map(nid, [0, 1, 2, 3, 4, 6, 7, 8, 9])])],
                               graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [graph.SeqPath([*map(nid, [0, 1, 2, 3, 4, 6, 7, 9])])],
                               graph.SequenceMetadata({'group': '2'})),
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
예제 #5
0
def _init_sequences(sequences_info: Dict[int, POSequenceInfo],
                    metadata: Optional[msa.MetadataCSV]) -> \
        Dict[msa.SequenceID, graph.Sequence]:
    metadata_sequences_ids = metadata.get_all_sequences_ids() \
                              if metadata else []
    po_sequences_ids = [seq_info.name for seq_info in sequences_info.values()]
    initial_sequences = {seq_id: graph.Sequence(seqid=seq_id,
                                                paths=[],
                                                seqmetadata=metadata.get_sequence_metadata(seq_id)
                                                if metadata else {})
                         for seq_id in set(po_sequences_ids + metadata_sequences_ids)}

    return initial_sequences
예제 #6
0
def _init_sequences(sequences_in_dagmaf: List[msa.SequenceID],
                    metadata: Optional[msa.MetadataCSV]) -> \
        Dict[msa.SequenceID, graph.Sequence]:
    metadata_sequences_ids = metadata.get_all_sequences_ids(
    ) if metadata else []
    initial_sequences = {
        seq_id:
        graph.Sequence(seqid=seq_id,
                       paths=[],
                       seqmetadata=metadata.get_sequence_metadata(seq_id)
                       if metadata else {})
        for seq_id in set(sequences_in_dagmaf + metadata_sequences_ids)
    }

    return initial_sequences
    def test_subpoagraph_should_omit_in_nodes_and_aligned_nodes(self):
        # original poagraph
        nodes = [
            graph.Node(node_id=nid(0), base=b('A'), aligned_to=None),
            graph.Node(node_id=nid(1), base=b('C'), aligned_to=nid(2)),
            graph.Node(node_id=nid(2), base=b('T'), aligned_to=nid(1)),
            graph.Node(node_id=nid(3), base=b('G'), aligned_to=None)
        ]

        sequences = {
            msa.SequenceID('seq1'):
            graph.Sequence(msa.SequenceID('seq1'),
                           [graph.SeqPath([*map(nid, [0, 1, 3])])],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(msa.SequenceID('seq2'),
                           [graph.SeqPath([*map(nid, [0, 2, 3])])],
                           graph.SequenceMetadata({'group': '1'}))
        }
        poagraph = graph.Poagraph(nodes, sequences)

        translator = poa._PoagraphPOTranslator(poagraph,
                                               [msa.SequenceID('seq2')])
        actual_po_content = translator.get_input_po_content()
        expected_po_content = "VERSION=pangenome\n" \
                              "NAME=pangenome\n" \
                              "TITLE=pangenome\n" \
                              "LENGTH=3\n" \
                              "SOURCECOUNT=1\n" \
                              "SOURCENAME=seq2\n" \
                              "SOURCEINFO=3 0 100 -1 seq2\n" \
                              "a:S0\n" \
                              "t:L0S0\n" \
                              "g:L1S0"

        self.assertEqual(expected_po_content, actual_po_content)
예제 #8
0
def _init_poagraph(alignment: _ParsedMaf,
                   metadata: Optional[msa.MetadataCSV]) -> \
        Tuple[List[graph.Node], Dict[msa.SequenceID, graph.Sequence]]:
    maf_sequences_ids = _get_sequences_ids(alignment)
    metadata_sequences_ids = metadata.get_all_sequences_ids(
    ) if metadata else []
    initial_sequences = {
        seq_id:
        graph.Sequence(seqid=seq_id,
                       paths=[],
                       seqmetadata=metadata.get_sequence_metadata(seq_id)
                       if metadata else {})
        for seq_id in set(maf_sequences_ids + metadata_sequences_ids)
    }

    return [], initial_sequences
    def test_2_consensuses_and_empty_sequences(self):
        po_path = self.po_files_dir.joinpath("test_2.po")

        expected_nodes = [
            graph.Node(node_id=nid(0), base=bid('C'), aligned_to=nid(1)),
            graph.Node(node_id=nid(1), base=bid('T'), aligned_to=nid(0)),
            graph.Node(node_id=nid(2), base=bid('A'), aligned_to=nid(3)),
            graph.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)),
            graph.Node(node_id=nid(4), base=bid('C'), aligned_to=None),
            graph.Node(node_id=nid(5), base=bid('T'), aligned_to=None),
            graph.Node(node_id=nid(6), base=bid('A'), aligned_to=nid(7)),
            graph.Node(node_id=nid(7), base=bid('T'), aligned_to=nid(6)),
            graph.Node(node_id=nid(8), base=bid('G'), aligned_to=None)
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(msa.SequenceID('seq0'),
                           [graph.SeqPath([*map(nid, [0, 3, 4, 5, 6, 8])])],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
            graph.Sequence(msa.SequenceID('seq1'),
                           [graph.SeqPath([*map(nid, [1, 2, 4, 5, 7, 8])])],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(msa.SequenceID('seq2'), [],
                           graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
            graph.Sequence(msa.SequenceID('seq3'), [],
                           graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('CONSENS0'):
            graph.Sequence(msa.SequenceID('CONSENS0'),
                           [graph.SeqPath([*map(nid, [0, 3, 4, 5, 7, 8])])],
                           graph.SequenceMetadata({})),
            msa.SequenceID('CONSENS1'):
            graph.Sequence(msa.SequenceID('CONSENS1'),
                           [graph.SeqPath([*map(nid, [1, 2, 4, 5, 6, 8])])],
                           graph.SequenceMetadata({}))
        }

        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        nodes, sequences = po2poagraph.get_poagraph(
            msa.Po(pathtools.get_file_content_stringio(po_path), po_path),
            self.metadatacsv)
        actual_poagraph = graph.Poagraph(nodes, sequences)
        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_2_consensuses_and_empty_sequences(self):
        expected_po_content_path = self.po_files_dir.joinpath("test_2.po")

        poagraph_nodes = [graph.Node(node_id=nid(0), base=bid('C'), aligned_to=nid(1)),
                          graph.Node(node_id=nid(1), base=bid('T'), aligned_to=nid(0)),
                          graph.Node(node_id=nid(2), base=bid('A'), aligned_to=nid(3)),
                          graph.Node(node_id=nid(3), base=bid('G'), aligned_to=nid(2)),
                          graph.Node(node_id=nid(4), base=bid('C'), aligned_to=None),
                          graph.Node(node_id=nid(5), base=bid('T'), aligned_to=None),
                          graph.Node(node_id=nid(6), base=bid('A'), aligned_to=nid(7)),
                          graph.Node(node_id=nid(7), base=bid('T'), aligned_to=nid(6)),
                          graph.Node(node_id=nid(8), base=bid('G'), aligned_to=None)
                          ]

        poagraph_sequences = {
            msa.SequenceID('seq0'):
                graph.Sequence(msa.SequenceID('seq0'),
                               [graph.SeqPath([*map(nid, [0, 3, 4, 5, 6, 8])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
                graph.Sequence(msa.SequenceID('seq1'),
                               [graph.SeqPath([*map(nid, [1, 2, 4, 5, 7, 8])])],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
                graph.Sequence(msa.SequenceID('seq2'),
                               [],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq3'):
                graph.Sequence(msa.SequenceID('seq3'),
                               [],
                               graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('CONSENS0'):
                graph.Sequence(msa.SequenceID('CONSENS0'),
                               [graph.SeqPath([*map(nid, [0, 3, 4, 5, 7, 8])])],
                               None),
            msa.SequenceID('CONSENS1'):
                graph.Sequence(msa.SequenceID('CONSENS1'),
                               [graph.SeqPath([*map(nid, [1, 2, 4, 5, 6, 8])])],
                               None),
        }

        poagraph = graph.Poagraph(poagraph_nodes, poagraph_sequences)

        actual_po_content = po.poagraph_to_PangenomePO(poagraph)
        expected_po_content = pathtools.get_file_content(expected_po_content_path)
        self.assertEqual(expected_po_content, actual_po_content)
    def test_1_missing_sequence_start(self):
        maf_path = self.maf_files_dir.joinpath(
            "test_1_missing_sequence_start.maf")
        expected_nodes = [
            graph.Node(node_id=nid(0),
                       base=graph.Base(self.missing_n.value),
                       aligned_to=None,
                       block_id=bid(0)),
            graph.Node(node_id=nid(1),
                       base=graph.Base(self.missing_n.value),
                       aligned_to=None,
                       block_id=bid(0)),
            graph.Node(node_id=nid(2),
                       base=graph.Base(self.missing_n.value),
                       aligned_to=None,
                       block_id=bid(0)),
            graph.Node(node_id=nid(3), base=graph.Base('A'),
                       aligned_to=nid(4)),
            graph.Node(node_id=nid(4), base=graph.Base('G'),
                       aligned_to=nid(3)),
            graph.Node(node_id=nid(5),
                       base=graph.Base('G'),
                       aligned_to=None,
                       block_id=bid(0)),
            graph.Node(node_id=nid(6), base=graph.Base('G'),
                       aligned_to=nid(7)),
            graph.Node(node_id=nid(7), base=graph.Base('T'),
                       aligned_to=nid(6)),
            graph.Node(node_id=nid(8),
                       base=graph.Base('C'),
                       aligned_to=None,
                       block_id=bid(0)),
            graph.Node(node_id=nid(9),
                       base=graph.Base('A'),
                       aligned_to=None,
                       block_id=bid(0)),
            graph.Node(node_id=nid(10),
                       base=graph.Base('G'),
                       aligned_to=None,
                       block_id=bid(0)),
            graph.Node(node_id=nid(11),
                       base=graph.Base('T'),
                       aligned_to=None,
                       block_id=bid(0))
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(msa.SequenceID('seq0'), [],
                           graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
            graph.Sequence(
                msa.SequenceID('seq1'),
                [graph.SeqPath([*map(nid, [0, 1, 2, 3, 5, 6, 11])])],
                graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(
                msa.SequenceID('seq2'),
                [graph.SeqPath([*map(nid, [4, 5, 7, 8, 9, 10, 11])])],
                graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
            graph.Sequence(msa.SequenceID('seq3'), [],
                           graph.SequenceMetadata({'group': '2'}))
        }
        expected_poagraph = graph.Poagraph(expected_nodes, expected_sequences)
        actual_poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            missings.ConstBaseProvider(self.missing_n), self.metadatacsv)

        self.assertEqual(expected_poagraph, actual_poagraph)
    def test_1_messy_sequences(self):
        maf_path = self.maf_files_dir.joinpath("test_1_messy_sequences.maf")
        expected_nodes = [
            graph.Node(node_id=nid(0),
                       base=graph.Base('A'),
                       aligned_to=None,
                       block_id=bid(0)),
            graph.Node(node_id=nid(1),
                       base=graph.Base('A'),
                       aligned_to=nid(2),
                       block_id=bid(0)),
            graph.Node(node_id=nid(2),
                       base=graph.Base('C'),
                       aligned_to=nid(1),
                       block_id=bid(0)),
            graph.Node(node_id=nid(3),
                       base=graph.Base('T'),
                       aligned_to=None,
                       block_id=bid(0)),
            graph.Node(node_id=nid(4),
                       base=graph.Base('C'),
                       aligned_to=nid(5),
                       block_id=bid(0)),
            graph.Node(node_id=nid(5),
                       base=graph.Base('G'),
                       aligned_to=nid(4),
                       block_id=bid(0)),
            graph.Node(node_id=nid(6),
                       base=graph.Base('A'),
                       aligned_to=None,
                       block_id=bid(1)),
            graph.Node(node_id=nid(7),
                       base=graph.Base('C'),
                       aligned_to=None,
                       block_id=bid(1)),
            graph.Node(node_id=nid(8),
                       base=graph.Base('G'),
                       aligned_to=None,
                       block_id=bid(1)),
            graph.Node(node_id=nid(9),
                       base=graph.Base('C'),
                       aligned_to=nid(10),
                       block_id=bid(2)),
            graph.Node(node_id=nid(10),
                       base=graph.Base('G'),
                       aligned_to=nid(9),
                       block_id=bid(2)),
            graph.Node(node_id=nid(11),
                       base=graph.Base('T'),
                       aligned_to=None,
                       block_id=bid(2)),
            graph.Node(node_id=nid(12),
                       base=graph.Base('C'),
                       aligned_to=None,
                       block_id=bid(2)),
            graph.Node(node_id=nid(13),
                       base=graph.Base('C'),
                       aligned_to=None,
                       block_id=bid(2)),
            graph.Node(node_id=nid(14),
                       base=graph.Base('A'),
                       aligned_to=None,
                       block_id=bid(2)),
        ]

        expected_sequences = {
            msa.SequenceID('seq0'):
            graph.Sequence(
                msa.SequenceID('seq0'),
                [graph.SeqPath([*map(nid, [1, 3, 4, 6, 8, 9, 11, 12])])],
                graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq1'):
            graph.Sequence(
                msa.SequenceID('seq1'),
                [graph.SeqPath([*map(nid, [2, 3, 4, 10, 11, 12, 13, 14])])],
                graph.SequenceMetadata({'group': '1'})),
            msa.SequenceID('seq2'):
            graph.Sequence(
                msa.SequenceID('seq2'),
                [graph.SeqPath([*map(nid, [0, 2, 5, 6, 7, 10, 11, 12, 14])])],
                graph.SequenceMetadata({'group': '2'})),
            msa.SequenceID('seq3'):
            graph.Sequence(msa.SequenceID('seq3'), [],
                           graph.SequenceMetadata({'group': '2'}))
        }
        actual_nodes, actual_sequences = maf2poagraph.get_poagraph(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.metadatacsv)

        self.assertEqual(expected_nodes, actual_nodes)
        self.assertEqual(expected_sequences, actual_sequences)