Exemple #1
0
 def test_ignore_case_replace_longest(self):
     if sys.version_info.major < 3:
         return
     trie = Trie(ignore_case=True)
     ids = {w: trie.insert(w) for w in [u"aİİ", u"aai̇", u"aai̇bİ"]}
     replaced = {
         ids[u"aİİ"]: u"a",
         ids[u"aai̇"]: u"b",
         ids[u"aai̇bİ"]: u"c",
     }
     res = trie.replace_longest(u"aaİ aai̇bİaa",
                                lambda x, start, end: replaced[x])
     self.assertEqual(res, u"b caa")
     sep = set([ord(" ")])  # space as seperator
     res = trie.replace_longest(u"aaİ aai̇bİaa",
                                lambda x, start, end: replaced[x], sep)
     self.assertEqual(res, u"b aai̇bİaa")
Exemple #2
0
 def test_pickle_trie(self):
     trie = Trie(ignore_case=True)
     ids = {w: trie.insert(w) for w in [u"aİİ", u"aai̇", u"aai̇bİ"]}
     with open("trie.pkl", "wb") as fo:
         pickle.dump(trie, fo)
     with open("trie.pkl", "rb") as fi:
         trie = pickle.load(fi)
     replaced = {
         ids[u"aİİ"]: u"a",
         ids[u"aai̇"]: u"b",
         ids[u"aai̇bİ"]: u"c",
     }
     res = trie.replace_longest(u"aaİ aai̇bİaa",
                                lambda x, start, end: replaced[x])
     self.assertEqual(res, u"b caa")
     sep = set([ord(" ")])  # space as seperator
     res = trie.replace_longest(u"aaİ aai̇bİaa",
                                lambda x, start, end: replaced[x], sep)
     self.assertEqual(res, u"b aai̇bİaa")
Exemple #3
0
    def test_replace_longest(self):
        trie = Trie()
        ids = {
            w: trie.insert(w)
            for w in
            [u"New York", u"New", u"York", u"York City", u"City", u"City is"]
        }
        replaced = {
            ids[u"New York"]: u"Beijing",
            ids[u"New"]: u"Old",
            ids[u"York"]: u"Yark",
            ids[u"York City"]: u"Yerk Town",
            ids[u"City"]: u"Country",
            ids[u"City is"]: u"Province are"
        }
        res = trie.replace_longest(u"New York  City isA",
                                   lambda x, start, end: replaced[x])
        self.assertEqual(res, u"Beijing  Province areA")

        sep = set([ord(" ")])  # space as seperator
        res = trie.replace_longest(u"New York  City isA",
                                   lambda x, start, end: replaced[x], sep)
        self.assertEqual(res, u"Beijing  Country isA")
Exemple #4
0
 def test_replace_words(self):
     dir_ = os.path.dirname(__file__)
     trie = Trie()
     ids = []
     with open(os.path.join(dir_, "../bench/words.txt")) as fi:
         for l in fi:
             l = l.strip()
             if isinstance(l, bytes):
                 l = l.decode("utf8")
             if len(l) > 0:
                 ids.append(trie.insert(l))
     with open(os.path.join(dir_, "../bench/words.txt")) as fi:
         txt = fi.read()
         if isinstance(txt, bytes):
             txt = txt.decode("utf8")
     sep = set([ord("\n")])
     ret = trie.replace_longest(txt, lambda v, start, end: str(v),
                                sep).strip()
     self.assertEqual(ret, "\n".join([str(i) for i in ids]))