def _test_persist(self): triedict = TrieDict() triedict.add_pattern("blaaaa") triedict.add_pattern("blauu") triedict.generate_suffix_links() print triedict print triedict.to_string() triedict.save("test.triedict") triedict2 = TrieDict.load("test.triedict") print triedict2 print triedict2.to_string()
def create_triedict(fn_name2IDs, fn_output, max_n=-1): td = TrieDict() fp_name2IDs = open(fn_name2IDs, "r") sys.stderr.write("creating trie...\n") for i,line in enumerate(fp_name2IDs): if max_n > 0 and i >= max_n: break if i % 1000 == 0: sys.stderr.write("\r%d" % i) sys.stderr.flush() line = line.strip() name, IDs = line.split("\t") name = name.decode("utf-8") td.add_pattern(name, i) sys.stderr.write("\ncreating suffix pointers...\n") td.generate_suffix_pointers() sys.stderr.write("\nsaving trie...\n") td.save(fn_output) sys.stderr.write("done\n")