def test_search_all_patterns(self): patterns = ["new york", "york city", "new york city"] text = "new york city" answers = {"new york": 0, "york city": 4, "new york city": 0} trie = TrieSearch(patterns=patterns) for ptn, idx in trie.search_all_patterns(text): self.assertIn(ptn, answers) self.assertEqual(idx, answers[ptn])
def test_search_longest_patterns_jp(self): patterns = ["ニューヨーク", "ヨークシティ", "ニューヨークシティ"] text = "ニューヨークシティ" answers = {"ニューヨークシティ": 0} trie = TrieSearch(patterns=patterns, splitter="") for ptn, idx in trie.search_longest_patterns(text): self.assertIn(ptn, answers) self.assertEqual(idx, answers[ptn])
def test_search_all_patterns(self): patterns = ['new york', 'york city', 'new york city'] text = 'new york city' answers = {'new york': 0, 'york city': 4, 'new york city': 0} trie = TrieSearch(patterns=patterns) for ptn, idx in trie.search_all_patterns(text): self.assertIn(ptn, answers) self.assertEqual(idx, answers[ptn])
def test_search_longest_patterns_jp(self): patterns = ['ニューヨーク', 'ヨークシティ', 'ニューヨークシティ'] text = 'ニューヨークシティ' answers = {'ニューヨークシティ': 0} trie = TrieSearch(patterns=patterns, splitter='') for ptn, idx in trie.search_longest_patterns(text): self.assertIn(ptn, answers) self.assertEqual(idx, answers[ptn])
def test_search_regexp_splitter(self): patterns = [ "new york", "york city", ] text = "new york, york city and some new york city here" splitter = f"[\w\d]+" answers = {"new york": {0, 29}, "york city": {10, 33}} trie = TrieSearch(patterns=patterns, splitter=splitter) for ptn, idx in trie.search_all_patterns(text): self.assertIn(ptn, answers) self.assertIn(idx, answers[ptn])