Beispiel #1
0
 def test_uses_yield(self):
     from types import GeneratorType
     trie = in3120.Trie()
     trie.add(["foo"], self.__tokenizer)
     finder = in3120.StringFinder(trie, self.__tokenizer)
     matches = finder.scan("the foo bar")
     self.assertIsInstance(matches, GeneratorType, "Are you using yield?")
Beispiel #2
0
def repl_b_2():
    print("Building trie from MeSH corpus...")
    normalizer = in3120.BrainDeadNormalizer()
    tokenizer = in3120.BrainDeadTokenizer()
    corpus = in3120.InMemoryCorpus(data_path("mesh.txt"))
    dictionary = in3120.Trie()
    dictionary.add((normalizer.normalize(normalizer.canonicalize(d["body"])) for d in corpus), tokenizer)
    engine = in3120.StringFinder(dictionary, tokenizer)
    print("Enter some text and locate words and phrases that are MeSH terms.")
    simple_repl("text", lambda t: list(engine.scan(normalizer.normalize(normalizer.canonicalize(t)))))
Beispiel #3
0
 def test_access_nodes(self):
     tokenizer = in3120.BrainDeadTokenizer()
     root = in3120.Trie()
     root.add(["abba", "ørret", "abb", "abbab", "abbor"], tokenizer)
     self.assertFalse(root.is_final())
     self.assertIsNone(root.consume("snegle"))
     node = root.consume("ab")
     self.assertFalse(node.is_final())
     node = node.consume("b")
     self.assertTrue(node.is_final())
     self.assertEqual(node, root.consume("abb"))
Beispiel #4
0
 def test_mesh_terms_in_cran_corpus(self):
     mesh = in3120.InMemoryCorpus("../data/mesh.txt")
     cran = in3120.InMemoryCorpus("../data/cran.xml")
     trie = in3120.Trie()
     trie.add((d["body"] or "" for d in mesh), self.__tokenizer)
     finder = in3120.StringFinder(trie, self.__tokenizer)
     self.__scan_buffer_verify_matches(finder, cran[0]["body"],
                                       ["wing", "wing"])
     self.__scan_buffer_verify_matches(finder, cran[3]["body"],
                                       ["solutions", "skin", "friction"])
     self.__scan_buffer_verify_matches(finder, cran[1254]["body"],
                                       ["electrons", "ions"])
Beispiel #5
0
 def test_scan(self):
     dictionary = in3120.Trie()
     dictionary.add([
         "romerike", "apple computer", "norsk", "norsk ørret", "sverige",
         "ørret", "banan", "a", "a b"
     ], self.__tokenizer)
     finder = in3120.StringFinder(dictionary, self.__tokenizer)
     self.__scan_buffer_verify_matches(
         finder, "en norsk     ørret fra romerike likte abba fra sverige",
         ["norsk", "norsk ørret", "ørret", "romerike", "sverige"])
     self.__scan_buffer_verify_matches(finder, "the apple is red", [])
     self.__scan_buffer_verify_matches(finder, "", [])
     self.__scan_buffer_verify_matches(
         finder, "apple computer banan foo sverige ben reddik fy fasan",
         ["apple computer", "banan", "sverige"])
     self.__scan_buffer_verify_matches(finder, "a a b", ["a", "a", "a b"])