def test_random_hamming_distance(self, _, algorithm):
     T, n, A = 100, 100, ['a', 'b']
     for _ in range(T):
         t_1, t_2 = rand.random_word(n, A), rand.random_word(n, A)
         reference = distance.hamming_distance(t_1, t_2, n, n)
         self.check_distance(t_1, t_2, n, n, reference,
                             distance.HAMMING_DISTANCE, algorithm)
 def test_random_string_matching_with_dont_care(self):
   T, n, m, A = 100, 500, 10, ['a', 'b']
   for _ in range(T):
     t, w = rand.random_word(n, A), rand.random_word(m, A)
     reference = [match.start() + 1 for match in regex.finditer(
         w[1:].replace('?', '.'), t[1:], overlapped = True)]
     self.check_get_all_matches_with_dont_care(t, w, n, m, reference)
 def test_random_edit_distance(self, _, algorithm):
     T, n_1, n_2, A = 100, 15, 15, ['a', 'b']
     for _ in range(T):
         t_1, t_2 = rand.random_word(n_1, A), rand.random_word(n_2, A)
         reference = distance.distance(t_1, t_2, n_1, n_2,
                                       distance.EDIT_DISTANCE)
         self.check_distance(t_1, t_2, n_1, n_2, reference,
                             distance.EDIT_DISTANCE, algorithm)
Beispiel #4
0
 def test_random_string_matching(self, _, algorithm):
   T, n, m, K, A = 100, 20, 10, 6, ['a', 'b']
   for _ in range(T):
     for k in range(1, K + 1):
       t, w = rand.random_word(n, A), rand.random_word(m, A)
       reference = [index for index in range(1, n + 1)
                    if self.check_hamming_distance(t, w, m, k, index)]
       self.check_get_all_matches(t, w, n, m, k, reference, algorithm)
Beispiel #5
0
 def test_random_lcs(self, _, algorithm):
     T, n_1, n_2, A = 100, 7, 7, ['a', 'b']
     for _ in range(T):
         t_1, t_2 = rand.random_word(n_1, A), rand.random_word(n_2, A)
         self.check_lcs(
             t_1, t_2, n_1, n_2,
             distance.distance(t_1, t_2, n_1, n_2, distance.INDEL_DISTANCE),
             algorithm)
Beispiel #6
0
 def test_random_big(self, _, build, algorithm):
     T, n, m, A = 100, 1000, 100, ['a', 'b', 'c']
     for _ in range(T):
         t = random_word(n, A)
         W = {random_word(random.randint(2, 5), A) for _ in range(m)}
         reference = [(w[1:], i) for w in W
                      for i in forward.brute_force(t, w, n,
                                                   len(w) - 1)]
         self.check_all_matches(t, n, W, reference, build, algorithm)
 def test_big_texts(self):
     a_len, alphabet = 1000000, ['a', 'b', 'c']
     text_a = random_word(a_len, alphabet)
     text_b = text_a + random_word(100, alphabet)
     text_a = text_a + 'eeeeeeeeee'
     text_lcs = longest_common_substring(text_a, text_b,
                                         len(text_a) - 1,
                                         len(text_b) - 1)
     self.assertTrue(
         validate_lcs(text_a, text_b, text_lcs) and len(text_lcs) == a_len)
 def test_random_string_matching_with_edit_distance(self):
     T, n, m, A = 100, 300, 10, ['a', 'b']
     for _ in range(T):
         t, w, k = rand.random_word(n, A), rand.random_word(
             m, A), randrange(m - 1)
         reference = [
             index - 1 for index in range(1, n + 2)
             if check_subwords(t, w, m, k, index)
         ]
         self.check_get_all_matches_with_edit_distance(
             t, w, n, m, k, reference)
Beispiel #9
0
  def test_random_lcp_lr_matching(self):
    T, n, A = 1000, 500, ['a', 'b']
    m, TT = 10, 10
    for _ in range(T):
      text = rand.random_word(n, A)
      sa = suffix_array.skew(text, n)
      lcp = suffix_array.lcp_kasai(sa, text, n)
      lcplr = lcp_lr.lcplr_from_lcp(lcp, n)

      for _ in range(TT):
        word = rand.random_word(m, A)
        reference = suffix_array.contains(sa, text, word, n, m)
        self.assertEqual(list(lcp_lr.contains_with_lcplr(
            sa, lcplr, text, word, n, m)), list(reference))
Beispiel #10
0
    def test_random_small(self):
        n, m, A = 100, 25, ['a', 'b', 'c']
        for _ in range(100):
            t = random_word(n, A)
            patterns = {(random_word(randint(2, 5), A)) for _ in range(m)}
            cw_automat = commentz_walter_build(patterns)

            expected = set()
            for p in patterns:
                starts = brute_force(t, p, n, len(p) - 1)
                indices = [(p, i) for i in starts]
                expected.union(set(indices))

            found = set(commentz_walter_search(t[1::], n, cw_automat))
            self.assertSetEqual(expected, found)
Beispiel #11
0
    def test_myers_random_extensions(self):
        a_len, alphabet = 10000, ['a', 'b', 'c', 'd', 'e']
        text_a = random_word(a_len, alphabet)
        text_b = text_a
        diff = randint(50, 100)
        for _ in range(diff):
            place = randint(0, a_len - 1)
            extension = random_word(randint(1, 5), alphabet)
            text_b = text_b[:place] + extension + text_b[place:]

        text_lcs = longest_common_substring(text_a, text_b,
                                            len(text_a) - 1,
                                            len(text_b) - 1)
        self.assertTrue(
            validate_lcs(text_a, text_b, text_lcs) and len(text_lcs) == a_len)
Beispiel #12
0
    def test_random(self):
        n, m, A = 500, 30, ['a', 'b', 'c']
        for _ in range(100):
            t = random_word(n, A)
            patterns = {random_word(randint(2, 10), A) for _ in range(m)}
            automaton = self._create_from(patterns)

            expected = set()
            for p in patterns:
                starts = brute_force(t, f'#{p}', n, len(p) + 1)
                expected.union({(p, i) for i in starts})

            found = set(find_occurrences(t, n, automaton))

            self.assertSetEqual(expected, found)
Beispiel #13
0
    def test_random_small(self):
        n, m, A = 100, 25, ['a', 'b', 'c']
        for _ in range(100):
            t = random_word(n, A)
            patterns = {(random_word(randint(2, 5), A)) for _ in range(m)}
            cw_automat = fast_practical_multi_string_matching_build(patterns)

            expected = set()
            for p in patterns:
                starts = brute_force(t, p, n, len(p) - 1)
                indices = [(p, i) for i in starts]
                expected.union(set(indices))

            found = set(
                fast_practical_multi_string_matching(t[1::], n, cw_automat))
            self.assertSetEqual(expected, found)
Beispiel #14
0
 def test_small_words(self, _, algorithm, bound):
   tests, k, n_low, n_high, A = 100, 10, 1, 3, ['a', 'b', 'c']
   for _ in range(tests):
     T = [rand.random_word(random.randint(n_low, n_high), A)
          for _ in range(k)]
     self.check_shortest_common_superstring(
         T, shortest_common_superstring.exact(T), algorithm, bound)
Beispiel #15
0
 def test_random_suffix_links(self):
   T, n, A = 100, 200, ['a', 'b']
   for _ in range(T):
     t = rand.random_word(n, A)
     reference = TestSuffixTrees.get_suffix_links(
         *suffix_tree.mccreight(t, n))
     self.check_suffix_links(t, n, reference)
Beispiel #16
0
 def test_random_suffix_tree(self, _, algorithm, links):
     T, n, A = 100, 200, ['a', 'b']
     for _ in range(T):
         t = rand.random_word(n, A)
         reference = (suffix_tree.naive(t, n),
                      get_suffix_links(*suffix_tree.mccreight(t, n))[1])
         self.check_suffix_trees(t, n, reference, algorithm, links)
Beispiel #17
0
 def test_random_lcp_array(self):
   T, n, A = 100, 500, ['a', 'b']
   for _ in range(T):
     t = rand.random_word(n, A)
     reference = suffix_array.lcp_from_suffix_array(
         suffix_array.prefix_doubling(t, n), t, n)
     self.check_lcp_array(t, n, reference)
 def test_random_words(self):
     T, alphabet = 100, ["a", "b", "c", "d"]
     for _ in range(T):
         amount = random.randint(5, 50)
         input_words = [rand.random_word(random.randint(20, 300), alphabet) for\
         _ in range(amount)]
         result = shortest_common_super_approx(input_words)
         self.check_superstring(result, input_words)
 def test_many_abc_small_words(self):
     T, alphabet = 1000, ["a", "b", "c"]
     for _ in range(T):
         amount = random.randint(2, 4)
         input_words = [rand.random_word(random.randint(1, 4), alphabet)\
         for _ in range(amount)]
         result = shortest_common_super_approx(input_words)
         self.check_superstring(result, input_words)
         self.check_length(result, input_words, alphabet)
Beispiel #20
0
    def test_myers_random_lengths(self):
        iter_count, a_len, b_len = 10, randint(400, 1000), randint(400, 1000)
        alphabet = ['a', 'b', 'c']
        solutions = []
        for _ in range(iter_count):
            text_a = random_word(a_len, alphabet)
            text_b = random_word(b_len, alphabet)

            text_lcs = longest_common_substring(text_a, text_b,
                                                len(text_a) - 1,
                                                len(text_b) - 1)
            brute_lcs = brute_force_lcs(text_a, text_b,
                                        len(text_a) - 1,
                                        len(text_b) - 1)

            valid = (validate_lcs(text_a, text_b, text_lcs)
                     and len(text_lcs) == len(brute_lcs))

            solutions.append(valid)
        self.assertListEqual([True] * iter_count, solutions)
Beispiel #21
0
def generate_random_words_set(l, m, A):
    S = set()
    for _ in range(m):
        t = rand.random_word(l, A)
        S.add(t)

    for w_1, w_2 in itertools.combinations(S, 2):
        gen = forward.knuth_morris_pratt(w_1, w_2, len(w_1) - 1, len(w_2) - 1)
        if next(gen, None) is not None:
            S.remove(w_2)
    return S
 def test_random_maximum_suffix(self):
   T, n, A = 100, 1000, ['a', 'b']
   for _ in range(T):
     t = rand.random_word(n, A)
     reference = maximum_suffix.naive(t, n)
     self.check_maximum_suffix(t, n, reference)
Beispiel #23
0
 def test_random_suffix_tree(self):
   T, n, A = 100, 200, ['a', 'b']
   for _ in range(T):
     t = rand.random_word(n, A)
     reference = suffix_tree.naive(t, n)
     self.check_suffix_trees(t, n, reference)
Beispiel #24
0
 def test_random_suffix_array(self):
   T, n, A = 100, 500, ['a', 'b']
   for _ in range(T):
     t = rand.random_word(n, A)
     reference = suffix_array.naive(t, n)
     self.check_suffix_array(t, n, reference)
 def test_random_recode_lempel_ziv_77(self):
     T, n, A = 100, 500, ['a', 'b']
     for _ in range(T):
         t = rand.random_word(n, A)
         self.check_recode_lempel_ziv_77(t, n)
 def test_random_boyer_moore_shift(self):
   T, n, A = 100, 100, ['a', 'b']
   for _ in range(T):
     t = rand.random_word(n, A)
     reference_result = suffix.boyer_moore_shift_brute_force(t, n)
     self.check_boyer_moore_shift(t, n, reference_result)
 def test_random_exact_string_matching(self):
     T, n, m, A = 100, 500, 10, ['a', 'b']
     for _ in range(T):
         t, w = rand.random_word(n, A), rand.random_word(m, A)
         reference = list(forward.brute_force(t, w, n, m))
         self.check_get_all_exact_matches(t, w, n, m, reference)
 def test_random_critical_factorization(self):
     T, n, A = 100, 300, ['a', 'b']
     for _ in range(T):
         t = rand.random_word(n, A)
         reference = critical_factorization.naive_all(t, n)
         self.check_critical_factorization(t, n, reference)
 def test_random_string_lcs(self):
     T, n_1, n_2, A = 100, 100, 50, ['a', 'b']
     for _ in range(T):
         t_1, t_2 = rand.random_word(n_1, A), rand.random_word(n_2, A)
         self.check_linear_space_lcs(t_1, t_2, len(t_1) - 1, len(t_2) - 1)
Beispiel #30
0
 def test_random_exact_string_matching(self):
     T, n, m, A = 100, 500, 10, ['a', 'b']
     for _ in range(T):
         t, w = rand.random_word(n, A), rand.random_word(m, A + ['?'])
         reference = list(dont_care.basic_fft(t, w, n, m))
         self.make_test(t, w, reference)