Ejemplo n.º 1
0
 def test_random_suffix_links(self):
   T, n, A = 100, 200, ['a', 'b']
   for _ in range(T):
     t = rand.random_word(n, A)
     reference = TestSuffixTrees.get_suffix_links(
         *suffix_tree.mccreight(t, n))
     self.check_suffix_links(t, n, reference)
Ejemplo n.º 2
0
 def check_suffix_array(self, t, n, reference):
     for algorithm in SUFFIX_ARRAY_ALGORITHMS:
         self.assertEqual(algorithm(t, n), reference,
                          'Algorithm: {0}'.format(algorithm.__name__))
     self.assertEqual(
         suffix_array.from_suffix_tree(suffix_tree.mccreight(t, n)[0], n),
         reference, 'Suffix tree from suffix array')
Ejemplo n.º 3
0
 def test_random_suffix_tree(self, _, algorithm, links):
     T, n, A = 100, 200, ['a', 'b']
     for _ in range(T):
         t = rand.random_word(n, A)
         reference = (suffix_tree.naive(t, n),
                      get_suffix_links(*suffix_tree.mccreight(t, n))[1])
         self.check_suffix_trees(t, n, reference, algorithm, links)
Ejemplo n.º 4
0
 def test_all_suffix_links(self):
   N, A = 10, ['a', 'b']
   for n in range(2, N + 1):
     for t in itertools.product(A, repeat = n):
       t = '#' + ''.join(t)
       reference = TestSuffixTrees.get_suffix_links(
           *suffix_tree.mccreight(t, n))
       self.check_suffix_links(t, n, reference)
Ejemplo n.º 5
0
 def test_all_suffix_tree(self, _, algorithm, links):
     N, A = 10, ['a', 'b']
     for n in range(2, N + 1):
         for t in itertools.product(A, repeat=n):
             t = '#' + ''.join(t)
             reference = (suffix_tree.naive(t, n),
                          get_suffix_links(*suffix_tree.mccreight(t, n))[1])
             self.check_suffix_trees(t, n, reference, algorithm, links)
Ejemplo n.º 6
0
 def check_suffix_links(self, t, n, reference):
     self.assertEqual(
         TestSuffixTrees.get_suffix_links(*suffix_tree.mccreight(t, n)),
         reference, 'Algorithm: mccreight')
     self.assertEqual(
         TestSuffixTrees.get_suffix_links(*suffix_tree.ukkonen(t, n)),
         reference, 'Algorithm: ukkonen')
     self.assertEqual(
         TestSuffixTrees.get_backward_suffix_links(
             *suffix_tree.weiner(t, n)), reference, 'Algorithm: weiner')
Ejemplo n.º 7
0
 def check_lcp_array(self, t, n, reference):
     self.assertEqual(
         suffix_array.lcp_from_suffix_array(
             suffix_array.prefix_doubling(t, n), t, n), reference,
         'LCP array from suffix array')
     self.assertEqual(
         suffix_array.lcp_from_suffix_tree(suffix_tree.mccreight(t, n)[0]),
         reference, 'LCP array from suffix tree')
     self.assertEqual(
         suffix_array.lcp_kasai(suffix_array.prefix_doubling(t, n), t, n),
         reference, 'Algorithm: kasai')
Ejemplo n.º 8
0
def fast_on_average(text, word, n, m):
    ST, _ = suffix_tree.mccreight(word, m)
    i, r = m, min(2 * math.ceil(math.log(m, 2)), m - 1)
    while i <= n:
        if ST.find_node(text[(i - r):(i + 1)], r + 1) is not None:
            subtext = text[0] + text[(i - m + 1):(i + m - r)]
            subn = min(i + m - r, n + 1) - (i - m + 1)
            yield from [
                v + i - m
                for v in forward.knuth_morris_pratt(subtext, word, subn, m)
            ]
        i = i + m - r
    ['Boyer-Moore-Galil', backward.boyer_moore_galil],
    ['Turbo-Boyer-Moore', backward.turbo_boyer_moore],
    ['bad shift heuristic', backward.bad_shift_heuristic],
    ['quick search heuristic', backward.quick_search],
    [
        'Boyer-Moore-Apostolico-Giancarlo',
        backward.boyer_moore_apostolico_giancarlo
    ],
    ['Horspool', backward.horspool],
    ['Karp-Rabin', other.karp_rabin],
    ['fast-on-average', other.fast_on_average],
    ['two-way constant space', other.two_way],
    [
        'suffix tree',
        lambda t, w, n, m: suffix_tree.contains(
            suffix_tree.mccreight(t, n)[0], t, w, n, m),
    ],
    [
        'suffix array',
        lambda t, w, n, m: suffix_array.contains(
            suffix_array.prefix_doubling(t, n), t, w, n, m),
    ],
    ['lcp-lr array', lcp_lr_contains],
]


class TestExactStringMatching(unittest.TestCase):
    run_large = unittest.skipUnless(os.environ.get('LARGE', False),
                                    'Skip test in small runs')

    def check_first_exact_match(self, t, w, n, m, reference, algorithm):
Ejemplo n.º 10
0
from generator import rand
from string_indexing import farach, lcp, suffix_array, suffix_tree

LCP_ARRAY_ALGORITHMS = [
    [
        'Farach',
        farach.lcp_array,
    ],
    [
        'Kasai',
        lambda t, n: lcp.kasai(suffix_array.prefix_doubling(t, n), t, n),
    ],
    [
        'from suffix tree',
        lambda t, n: lcp.from_suffix_tree(suffix_tree.mccreight(t, n)[0]),
    ],
    [
        'from suffix array',
        lambda t, n: lcp.from_suffix_array(suffix_array.prefix_doubling(t, n),
                                           t, n),
    ]
]


class TestLcpArrays(unittest.TestCase):
    run_large = unittest.skipUnless(os.environ.get('LARGE', False),
                                    'Skip test in small runs')

    def check_lcp_array(self, t, n, reference, algorithm):
        self.assertEqual(algorithm(t, n), reference)
Ejemplo n.º 11
0
import parameterized

from generator import rand
from string_indexing import farach, suffix_array, suffix_tree

SUFFIX_ARRAY_ALGORITHMS = [[
    'Karp-Miller-Rosenberg', suffix_array.prefix_doubling
], ['Karkkainen-Sanders', suffix_array.skew], ['Farach', farach.suffix_array],
                           ['Larsson-Sadakane', suffix_array.larsson_sadakane],
                           ['Ko-Aluru', suffix_array.small_large],
                           ['Zhang-Nong-Chan', suffix_array.induced_sorting],
                           [
                               'from suffix tree',
                               lambda t, n: suffix_array.from_suffix_tree(
                                   suffix_tree.mccreight(t, n)[0], n),
                           ]]


class TestSuffixArrays(unittest.TestCase):
    run_large = unittest.skipUnless(os.environ.get('LARGE', False),
                                    'Skip test in small runs')

    def check_suffix_array(self, t, n, reference, algorithm):
        self.assertEqual(algorithm(t, n), reference,
                         'Algorithm {}, text {}'.format(algorithm.__name__, t))

    @parameterized.parameterized.expand(SUFFIX_ARRAY_ALGORITHMS)
    def test_examples(self, _, algorithm):
        self.check_suffix_array('#abaaba', 6, [7, 6, 3, 4, 1, 5, 2], algorithm)
        self.check_suffix_array('#banana', 6, [7, 6, 4, 2, 1, 5, 3], algorithm)