Exemple #1
0
 def test_overlapping(self):
     cw_automat = fast_practical_multi_string_matching_build(
         ['he', 'she', 'his', 'her', 'hers'])
     found = list(
         fast_practical_multi_string_matching('eshers', 6, cw_automat))
     correct = [('she', 1), ('he', 2), ('her', 2), ('hers', 2)]
     self.assertListEqual(found, correct)
Exemple #2
0
 def test_non_overlapping(self):
     cw_automat = fast_practical_multi_string_matching_build(
         ['aa', 'bcc', 'bcab'])
     found = list(
         fast_practical_multi_string_matching('aabccabcab', 10, cw_automat))
     correct = [('aa', 0), ('bcc', 2), ('bcab', 6)]
     self.assertListEqual(found, correct)
Exemple #3
0
 def test_small(self):
     cw_automat = fast_practical_multi_string_matching_build(
         ['cacbaa', 'acb', 'aba', 'acbab', 'ccbab'])
     found = list(
         fast_practical_multi_string_matching('dacbaababababa', 14,
                                              cw_automat))
     correct = [('acb', 1), ('aba', 5), ('aba', 7), ('aba', 9), ('aba', 11)]
     self.assertListEqual(found, correct)
Exemple #4
0
    def test_random_small(self):
        n, m, A = 100, 25, ['a', 'b', 'c']
        for _ in range(100):
            t = random_word(n, A)
            patterns = {(random_word(randint(2, 5), A)) for _ in range(m)}
            cw_automat = fast_practical_multi_string_matching_build(patterns)

            expected = set()
            for p in patterns:
                starts = brute_force(t, p, n, len(p) - 1)
                indices = [(p, i) for i in starts]
                expected.union(set(indices))

            found = set(
                fast_practical_multi_string_matching(t[1::], n, cw_automat))
            self.assertSetEqual(expected, found)
Exemple #5
0
 def test_no_match(self):
     cw_automat = fast_practical_multi_string_matching_build(['bb', 'abba'])
     found = list(
         fast_practical_multi_string_matching('abababab', 8, cw_automat))
     self.assertFalse(list(found))
Exemple #6
0
 def test_pessimistic(self):
     cw_automat = fast_practical_multi_string_matching_build(
         [i * 'a' for i in range(1, 6)])
     found = list(
         fast_practical_multi_string_matching('a' * 20, 20, cw_automat))
     self.assertEqual(len(found), 20 + 19 + 18 + 17 + 16)
Exemple #7
0
 def test_single_keyword(self):
     cw_automat = fast_practical_multi_string_matching_build(['abc'])
     found = list(
         fast_practical_multi_string_matching('aabccabcab', 10, cw_automat))
     correct = [('abc', 1), ('abc', 5)]
     self.assertListEqual(found, correct)