Esempio n. 1
0
    def test_edges_for_ataaatg(self):
        words = [
            'ATAAATG',
        ]

        tree = generalized_suffix_tree.GeneralizedSuffixTree(words)

        self.assertEqual([
            'ATAAATG$',
        ], tree._words)

        expected_edges = [
            'A',
            'T',
            'G$',
            '$',
            'A',
            'T',
            'ATG$',
            'TG$',
            'AAATG$',
            'G$',
            'AAATG$',
            'G$',
        ]

        self.assert_edge_strings(expected_edges, tree)
Esempio n. 2
0
    def test_node_depth_in_tree_with_one_word(self):
        words = [
            'word',
        ]

        tree = generalized_suffix_tree.GeneralizedSuffixTree(words)

        self.assertEqual([
            'word$',
        ], tree._words)

        self.assertEqual(6, len(tree._nodes))

        self.assertEqual('', tree.node_substring(0))
        self.assertEqual(0, tree.node_depth(0))
        self.assertEqual('word$', tree.node_substring(1))
        self.assertEqual(4, tree.node_depth(1))
        self.assertEqual('ord$', tree.node_substring(2))
        self.assertEqual(3, tree.node_depth(2))
        self.assertEqual('rd$', tree.node_substring(3))
        self.assertEqual(2, tree.node_depth(3))
        self.assertEqual('d$', tree.node_substring(4))
        self.assertEqual(1, tree.node_depth(4))
        self.assertEqual('$', tree.node_substring(5))
        self.assertEqual(0, tree.node_depth(5))
Esempio n. 3
0
    def test_node_substring_in_tree_with_two_words(self):
        words = [
            'panama',
            'banana',
        ]

        tree = generalized_suffix_tree.GeneralizedSuffixTree(words)

        self.assertEqual([
            'panama$0',
            'banana$1',
        ], tree._words)

        self.assertEqual(18, len(tree._nodes))

        self.assertEqual('', tree.node_substring(0))
        self.assertEqual('panama$0', tree.node_substring(1))
        self.assertEqual('anama$0', tree.node_substring(2))
        self.assertEqual('nama$0', tree.node_substring(3))
        self.assertEqual('a', tree.node_substring(4))
        self.assertEqual('ama$0', tree.node_substring(5))
        self.assertEqual('ma$0', tree.node_substring(6))
        self.assertEqual('a$0', tree.node_substring(7))
        self.assertEqual('$0', tree.node_substring(8))
        self.assertEqual('banana$1', tree.node_substring(9))
        self.assertEqual('ana', tree.node_substring(10))
        self.assertEqual('anana$1', tree.node_substring(11))
        self.assertEqual('na', tree.node_substring(12))
        self.assertEqual('nana$1', tree.node_substring(13))
        self.assertEqual('ana$1', tree.node_substring(14))
        self.assertEqual('na$1', tree.node_substring(15))
        self.assertEqual('a$1', tree.node_substring(16))
        self.assertEqual('$1', tree.node_substring(17))
Esempio n. 4
0
    def test_edge_substring_in_tree_with_two_words(self):
        words = [
            'panama',
            'banana',
        ]

        tree = generalized_suffix_tree.GeneralizedSuffixTree(words)

        self.assertEqual([
            'panama$0',
            'banana$1',
        ], tree._words)

        self.assertEqual(17, len(tree._edges))

        edge = tree._edges[(0, 1)]
        self.assertEqual('panama$0', tree.edge_substring(edge))
        edge = tree._edges[(0, 4)]
        self.assertEqual('a', tree.edge_substring(edge))
        edge = tree._edges[(0, 6)]
        self.assertEqual('ma$0', tree.edge_substring(edge))
        edge = tree._edges[(0, 8)]
        self.assertEqual('$0', tree.edge_substring(edge))
        edge = tree._edges[(0, 9)]
        self.assertEqual('banana$1', tree.edge_substring(edge))
        edge = tree._edges[(0, 12)]
        self.assertEqual('na', tree.edge_substring(edge))
        edge = tree._edges[(0, 17)]
        self.assertEqual('$1', tree.edge_substring(edge))
        edge = tree._edges[(4, 5)]
        self.assertEqual('ma$0', tree.edge_substring(edge))
        edge = tree._edges[(4, 7)]
        self.assertEqual('$0', tree.edge_substring(edge))
        edge = tree._edges[(4, 10)]
        self.assertEqual('na', tree.edge_substring(edge))
        edge = tree._edges[(4, 16)]
        self.assertEqual('$1', tree.edge_substring(edge))
        edge = tree._edges[(12, 3)]
        self.assertEqual('ma$0', tree.edge_substring(edge))
        edge = tree._edges[(12, 13)]
        self.assertEqual('na$1', tree.edge_substring(edge))
        edge = tree._edges[(12, 15)]
        self.assertEqual('$1', tree.edge_substring(edge))
        edge = tree._edges[(10, 2)]
        self.assertEqual('ma$0', tree.edge_substring(edge))
        edge = tree._edges[(10, 11)]
        self.assertEqual('na$1', tree.edge_substring(edge))
        edge = tree._edges[(10, 14)]
        self.assertEqual('$1', tree.edge_substring(edge))
Esempio n. 5
0
    def test_edges_for_aca(self):
        words = [
            'ACA',
        ]

        tree = generalized_suffix_tree.GeneralizedSuffixTree(words)

        self.assertEqual([
            'ACA$',
        ], tree._words)

        expected_edges = [
            'CA$',
            'A',
            '$',
            'CA$',
            '$',
        ]

        self.assert_edge_strings(expected_edges, tree)
Esempio n. 6
0
    def test_edge_substring_in_tree_with_one_word(self):
        words = [
            'word',
        ]

        tree = generalized_suffix_tree.GeneralizedSuffixTree(words)

        self.assertEqual([
            'word$',
        ], tree._words)

        self.assertEqual(5, len(tree._edges))

        edge = tree._edges[(0, 1)]
        self.assertEqual('word$', tree.edge_substring(edge))
        edge = tree._edges[(0, 2)]
        self.assertEqual('ord$', tree.edge_substring(edge))
        edge = tree._edges[(0, 3)]
        self.assertEqual('rd$', tree.edge_substring(edge))
        edge = tree._edges[(0, 4)]
        self.assertEqual('d$', tree.edge_substring(edge))
        edge = tree._edges[(0, 5)]
        self.assertEqual('$', tree.edge_substring(edge))
Esempio n. 7
0
    def test_constructor_with_two_words(self):
        words = [
            'panama',
            'banana',
        ]

        tree = generalized_suffix_tree.GeneralizedSuffixTree(words)

        self.assertEqual([
            'panama$0',
            'banana$1',
        ], tree._words)

        expected_nodes = []
        expected_edges = []

        self.assertEqual(18, len(tree._nodes))
        self.assertEqual(17, len(tree._edges))
        self.assertEqual(len(tree._nodes) - 1, len(tree._edges))

        self.assertEqual(-1, tree._nodes[0].parent)
        self.assertEqual([
            1,
            4,
            6,
            8,
            9,
            12,
            17,
        ], tree._nodes[0].children)
        self.assertEqual(set([
            0,
            1,
        ]), tree._nodes[0].words)
        expected_nodes.append(0)
        self.assertEqual(0, tree._nodes[1].parent)
        self.assertEqual([], tree._nodes[1].children)
        self.assertEqual(set([
            0,
        ]), tree._nodes[1].words)
        expected_nodes.append(1)
        self.assertEqual(0, tree._nodes[4].parent)
        self.assertEqual([
            5,
            7,
            10,
            16,
        ], tree._nodes[4].children)
        self.assertEqual(set([
            0,
            1,
        ]), tree._nodes[4].words)
        expected_nodes.append(4)
        self.assertEqual(0, tree._nodes[6].parent)
        self.assertEqual([], tree._nodes[6].children)
        self.assertEqual(set([
            0,
        ]), tree._nodes[6].words)
        expected_nodes.append(6)
        self.assertEqual(0, tree._nodes[8].parent)
        self.assertEqual([], tree._nodes[8].children)
        self.assertEqual(set([
            0,
        ]), tree._nodes[8].words)
        expected_nodes.append(8)
        self.assertEqual(0, tree._nodes[9].parent)
        self.assertEqual([], tree._nodes[9].children)
        self.assertEqual(set([
            1,
        ]), tree._nodes[9].words)
        expected_nodes.append(9)
        self.assertEqual(0, tree._nodes[12].parent)
        self.assertEqual([
            3,
            13,
            15,
        ], tree._nodes[12].children)
        self.assertEqual(set([
            0,
            1,
        ]), tree._nodes[12].words)
        expected_nodes.append(12)
        self.assertEqual(0, tree._nodes[17].parent)
        self.assertEqual([], tree._nodes[17].children)
        self.assertEqual(set([
            1,
        ]), tree._nodes[17].words)
        expected_nodes.append(17)
        self.assertEqual(4, tree._nodes[5].parent)
        self.assertEqual([], tree._nodes[5].children)
        self.assertEqual(set([
            0,
        ]), tree._nodes[5].words)
        expected_nodes.append(5)
        self.assertEqual(4, tree._nodes[7].parent)
        self.assertEqual([], tree._nodes[7].children)
        self.assertEqual(set([
            0,
        ]), tree._nodes[7].words)
        expected_nodes.append(7)
        self.assertEqual(4, tree._nodes[10].parent)
        self.assertEqual([
            2,
            11,
            14,
        ], tree._nodes[10].children)
        self.assertEqual(set([
            0,
            1,
        ]), tree._nodes[10].words)
        expected_nodes.append(10)
        self.assertEqual(4, tree._nodes[16].parent)
        self.assertEqual([], tree._nodes[16].children)
        self.assertEqual(set([
            1,
        ]), tree._nodes[16].words)
        expected_nodes.append(16)
        self.assertEqual(12, tree._nodes[3].parent)
        self.assertEqual([], tree._nodes[3].children)
        self.assertEqual(set([
            0,
        ]), tree._nodes[3].words)
        expected_nodes.append(3)
        self.assertEqual(12, tree._nodes[13].parent)
        self.assertEqual([], tree._nodes[13].children)
        self.assertEqual(set([
            1,
        ]), tree._nodes[13].words)
        expected_nodes.append(13)
        self.assertEqual(12, tree._nodes[15].parent)
        self.assertEqual([], tree._nodes[15].children)
        self.assertEqual(set([
            1,
        ]), tree._nodes[15].words)
        expected_nodes.append(15)
        self.assertEqual(10, tree._nodes[2].parent)
        self.assertEqual([], tree._nodes[2].children)
        self.assertEqual(set([
            0,
        ]), tree._nodes[2].words)
        expected_nodes.append(2)
        self.assertEqual(10, tree._nodes[11].parent)
        self.assertEqual([], tree._nodes[11].children)
        self.assertEqual(set([
            1,
        ]), tree._nodes[11].words)
        expected_nodes.append(11)
        self.assertEqual(10, tree._nodes[14].parent)
        self.assertEqual([], tree._nodes[14].children)
        self.assertEqual(set([
            1,
        ]), tree._nodes[14].words)
        expected_nodes.append(14)

        self.assert_items(expected_nodes, list(range(len(tree._nodes))))

        edge = tree._edges[(0, 1)]
        self.assertEqual(0, edge._word_index)
        self.assertEqual(0, edge._start_index)
        self.assertEqual(8, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('panama$0', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(0, 4)]
        self.assertEqual(0, edge._word_index)
        self.assertEqual(1, edge._start_index)
        self.assertEqual(2, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('a', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(0, 6)]
        self.assertEqual(0, edge._word_index)
        self.assertEqual(4, edge._start_index)
        self.assertEqual(8, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('ma$0', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(0, 8)]
        self.assertEqual(0, edge._word_index)
        self.assertEqual(6, edge._start_index)
        self.assertEqual(8, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('$0', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(0, 9)]
        self.assertEqual(1, edge._word_index)
        self.assertEqual(0, edge._start_index)
        self.assertEqual(8, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('banana$1', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(0, 12)]
        self.assertEqual(0, edge._word_index)
        self.assertEqual(2, edge._start_index)
        self.assertEqual(4, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('na', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(0, 17)]
        self.assertEqual(1, edge._word_index)
        self.assertEqual(6, edge._start_index)
        self.assertEqual(8, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('$1', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(4, 5)]
        self.assertEqual(0, edge._word_index)
        self.assertEqual(4, edge._start_index)
        self.assertEqual(8, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('ma$0', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(4, 7)]
        self.assertEqual(0, edge._word_index)
        self.assertEqual(6, edge._start_index)
        self.assertEqual(8, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('$0', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(4, 10)]
        self.assertEqual(0, edge._word_index)
        self.assertEqual(2, edge._start_index)
        self.assertEqual(4, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('na', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(4, 16)]
        self.assertEqual(1, edge._word_index)
        self.assertEqual(6, edge._start_index)
        self.assertEqual(8, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('$1', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(12, 3)]
        self.assertEqual(0, edge._word_index)
        self.assertEqual(4, edge._start_index)
        self.assertEqual(8, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('ma$0', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(12, 13)]
        self.assertEqual(1, edge._word_index)
        self.assertEqual(4, edge._start_index)
        self.assertEqual(8, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('na$1', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(12, 15)]
        self.assertEqual(1, edge._word_index)
        self.assertEqual(6, edge._start_index)
        self.assertEqual(8, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('$1', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(10, 2)]
        self.assertEqual(0, edge._word_index)
        self.assertEqual(4, edge._start_index)
        self.assertEqual(8, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('ma$0', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(10, 11)]
        self.assertEqual(1, edge._word_index)
        self.assertEqual(4, edge._start_index)
        self.assertEqual(8, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('na$1', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(10, 14)]
        self.assertEqual(1, edge._word_index)
        self.assertEqual(6, edge._start_index)
        self.assertEqual(8, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('$1', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)

        self.assert_items(expected_edges, tree._edges.values())
Esempio n. 8
0
    def test_constructor_with_one_word(self):
        words = [
            'word',
        ]

        tree = generalized_suffix_tree.GeneralizedSuffixTree(words)

        self.assertEqual([
            'word$',
        ], tree._words)

        expected_nodes = []
        expected_edges = []

        self.assertEqual(6, len(tree._nodes))
        self.assertEqual(5, len(tree._edges))
        self.assertEqual(len(tree._nodes) - 1, len(tree._edges))

        self.assertEqual(-1, tree._nodes[0].parent)
        self.assertEqual([
            1,
            2,
            3,
            4,
            5,
        ], tree._nodes[0].children)
        self.assertEqual(set([
            0,
        ]), tree._nodes[0].words)
        expected_nodes.append(0)
        self.assertEqual(0, tree._nodes[1].parent)
        self.assertEqual([], tree._nodes[1].children)
        self.assertEqual(set([
            0,
        ]), tree._nodes[1].words)
        expected_nodes.append(1)
        self.assertEqual(0, tree._nodes[2].parent)
        self.assertEqual([], tree._nodes[2].children)
        self.assertEqual(set([
            0,
        ]), tree._nodes[2].words)
        expected_nodes.append(2)
        self.assertEqual(0, tree._nodes[3].parent)
        self.assertEqual([], tree._nodes[3].children)
        self.assertEqual(set([
            0,
        ]), tree._nodes[3].words)
        expected_nodes.append(3)
        self.assertEqual(0, tree._nodes[4].parent)
        self.assertEqual([], tree._nodes[4].children)
        self.assertEqual(set([
            0,
        ]), tree._nodes[4].words)
        expected_nodes.append(4)
        self.assertEqual(0, tree._nodes[5].parent)
        self.assertEqual([], tree._nodes[5].children)
        self.assertEqual(set([
            0,
        ]), tree._nodes[5].words)
        expected_nodes.append(5)

        self.assert_items(expected_nodes, list(range(len(tree._nodes))))

        edge = tree._edges[(0, 1)]
        self.assertEqual(0, edge._word_index)
        self.assertEqual(0, edge._start_index)
        self.assertEqual(5, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('word$', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(0, 2)]
        self.assertEqual(0, edge._word_index)
        self.assertEqual(1, edge._start_index)
        self.assertEqual(5, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('ord$', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(0, 3)]
        self.assertEqual(0, edge._word_index)
        self.assertEqual(2, edge._start_index)
        self.assertEqual(5, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('rd$', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(0, 4)]
        self.assertEqual(0, edge._word_index)
        self.assertEqual(3, edge._start_index)
        self.assertEqual(5, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('d$', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)
        edge = tree._edges[(0, 5)]
        self.assertEqual(0, edge._word_index)
        self.assertEqual(4, edge._start_index)
        self.assertEqual(5, edge._stop_index)
        word = tree._words[edge._word_index]
        self.assertEqual('$', word[edge._start_index:edge._stop_index])
        expected_edges.append(edge)

        self.assert_items(expected_edges, tree._edges.values())