def test_random_suffix_links(self): T, n, A = 100, 200, ['a', 'b'] for _ in range(T): t = rand.random_word(n, A) reference = TestSuffixTrees.get_suffix_links( *suffix_tree.mccreight(t, n)) self.check_suffix_links(t, n, reference)
def check_suffix_array(self, t, n, reference): for algorithm in SUFFIX_ARRAY_ALGORITHMS: self.assertEqual(algorithm(t, n), reference, 'Algorithm: {0}'.format(algorithm.__name__)) self.assertEqual( suffix_array.from_suffix_tree(suffix_tree.mccreight(t, n)[0], n), reference, 'Suffix tree from suffix array')
def test_random_suffix_tree(self, _, algorithm, links): T, n, A = 100, 200, ['a', 'b'] for _ in range(T): t = rand.random_word(n, A) reference = (suffix_tree.naive(t, n), get_suffix_links(*suffix_tree.mccreight(t, n))[1]) self.check_suffix_trees(t, n, reference, algorithm, links)
def test_all_suffix_links(self): N, A = 10, ['a', 'b'] for n in range(2, N + 1): for t in itertools.product(A, repeat = n): t = '#' + ''.join(t) reference = TestSuffixTrees.get_suffix_links( *suffix_tree.mccreight(t, n)) self.check_suffix_links(t, n, reference)
def test_all_suffix_tree(self, _, algorithm, links): N, A = 10, ['a', 'b'] for n in range(2, N + 1): for t in itertools.product(A, repeat=n): t = '#' + ''.join(t) reference = (suffix_tree.naive(t, n), get_suffix_links(*suffix_tree.mccreight(t, n))[1]) self.check_suffix_trees(t, n, reference, algorithm, links)
def check_suffix_links(self, t, n, reference): self.assertEqual( TestSuffixTrees.get_suffix_links(*suffix_tree.mccreight(t, n)), reference, 'Algorithm: mccreight') self.assertEqual( TestSuffixTrees.get_suffix_links(*suffix_tree.ukkonen(t, n)), reference, 'Algorithm: ukkonen') self.assertEqual( TestSuffixTrees.get_backward_suffix_links( *suffix_tree.weiner(t, n)), reference, 'Algorithm: weiner')
def check_lcp_array(self, t, n, reference): self.assertEqual( suffix_array.lcp_from_suffix_array( suffix_array.prefix_doubling(t, n), t, n), reference, 'LCP array from suffix array') self.assertEqual( suffix_array.lcp_from_suffix_tree(suffix_tree.mccreight(t, n)[0]), reference, 'LCP array from suffix tree') self.assertEqual( suffix_array.lcp_kasai(suffix_array.prefix_doubling(t, n), t, n), reference, 'Algorithm: kasai')
def fast_on_average(text, word, n, m): ST, _ = suffix_tree.mccreight(word, m) i, r = m, min(2 * math.ceil(math.log(m, 2)), m - 1) while i <= n: if ST.find_node(text[(i - r):(i + 1)], r + 1) is not None: subtext = text[0] + text[(i - m + 1):(i + m - r)] subn = min(i + m - r, n + 1) - (i - m + 1) yield from [ v + i - m for v in forward.knuth_morris_pratt(subtext, word, subn, m) ] i = i + m - r
['Boyer-Moore-Galil', backward.boyer_moore_galil], ['Turbo-Boyer-Moore', backward.turbo_boyer_moore], ['bad shift heuristic', backward.bad_shift_heuristic], ['quick search heuristic', backward.quick_search], [ 'Boyer-Moore-Apostolico-Giancarlo', backward.boyer_moore_apostolico_giancarlo ], ['Horspool', backward.horspool], ['Karp-Rabin', other.karp_rabin], ['fast-on-average', other.fast_on_average], ['two-way constant space', other.two_way], [ 'suffix tree', lambda t, w, n, m: suffix_tree.contains( suffix_tree.mccreight(t, n)[0], t, w, n, m), ], [ 'suffix array', lambda t, w, n, m: suffix_array.contains( suffix_array.prefix_doubling(t, n), t, w, n, m), ], ['lcp-lr array', lcp_lr_contains], ] class TestExactStringMatching(unittest.TestCase): run_large = unittest.skipUnless(os.environ.get('LARGE', False), 'Skip test in small runs') def check_first_exact_match(self, t, w, n, m, reference, algorithm):
from generator import rand from string_indexing import farach, lcp, suffix_array, suffix_tree LCP_ARRAY_ALGORITHMS = [ [ 'Farach', farach.lcp_array, ], [ 'Kasai', lambda t, n: lcp.kasai(suffix_array.prefix_doubling(t, n), t, n), ], [ 'from suffix tree', lambda t, n: lcp.from_suffix_tree(suffix_tree.mccreight(t, n)[0]), ], [ 'from suffix array', lambda t, n: lcp.from_suffix_array(suffix_array.prefix_doubling(t, n), t, n), ] ] class TestLcpArrays(unittest.TestCase): run_large = unittest.skipUnless(os.environ.get('LARGE', False), 'Skip test in small runs') def check_lcp_array(self, t, n, reference, algorithm): self.assertEqual(algorithm(t, n), reference)
import parameterized from generator import rand from string_indexing import farach, suffix_array, suffix_tree SUFFIX_ARRAY_ALGORITHMS = [[ 'Karp-Miller-Rosenberg', suffix_array.prefix_doubling ], ['Karkkainen-Sanders', suffix_array.skew], ['Farach', farach.suffix_array], ['Larsson-Sadakane', suffix_array.larsson_sadakane], ['Ko-Aluru', suffix_array.small_large], ['Zhang-Nong-Chan', suffix_array.induced_sorting], [ 'from suffix tree', lambda t, n: suffix_array.from_suffix_tree( suffix_tree.mccreight(t, n)[0], n), ]] class TestSuffixArrays(unittest.TestCase): run_large = unittest.skipUnless(os.environ.get('LARGE', False), 'Skip test in small runs') def check_suffix_array(self, t, n, reference, algorithm): self.assertEqual(algorithm(t, n), reference, 'Algorithm {}, text {}'.format(algorithm.__name__, t)) @parameterized.parameterized.expand(SUFFIX_ARRAY_ALGORITHMS) def test_examples(self, _, algorithm): self.check_suffix_array('#abaaba', 6, [7, 6, 3, 4, 1, 5, 2], algorithm) self.check_suffix_array('#banana', 6, [7, 6, 4, 2, 1, 5, 3], algorithm)