def test_NRPattern(self): """ """ trie = DoubleArrayTrie() NRPattern.sort() trie.build(key=NRPattern) self.assertTrue(trie.exact_match_search("BCD") != -1) self.assertTrue(trie.exact_match_search("BBCD") != -1) self.assertTrue(trie.exact_match_search("BG") != -1) self.assertTrue(trie.exact_match_search("DG") != -1) self.assertTrue(trie.exact_match_search("CD") == -1)
def test_combin_by_dict(self): dat = DoubleArrayTrie() dat.build([u"江", u"河", u"湖", "海"]) text = u"江河湖海" word_net = WordNet(text) gen_word_net(text, word_net, dat) vertexs = [v[0] for v in word_net.vertexs] self.assertEqual(len(word_net), 6, u"自定义字典分词") combin_dat = DoubleArrayTrie() combin_dat.build(key=[u"江河湖海"], v=[u"江河湖海 n 1"]) vertexs = combine_by_custom_dict(vertexs, combin_dat) self.assertEqual(len(vertexs), 3, u"合并完成后应该只有前尾加中间词")
class NSPatternDict: def __init__(self): self.trie = DoubleArrayTrie() NSPattern.sort() self.trie.build(key=NSPattern)