def __init__(self, rfpath, max_len=4): self.prefixTree = Trie() self.suffixTree = Trie(direction='suffix') self.vocabulary = [] self.len_dict = dict() # 想要计n个字的词必须用n+1-gram self.max_len = max_len + 1 text = Cleaner.preprocess_text(rfpath) self.buildTreesAndDics(text) self.prefixTree.set_entropy() self.suffixTree.set_entropy() self.words = dict()
def __init__(self, rfpath=None, text=None, max_len=4): self.prefixTree = Trie() self.suffixTree = Trie(direction='suffix') self.vocabulary = [] self.len_dict = dict() # 想要计n个字的词必须用n+1-gram self.max_len = max_len + 1 if rfpath is not None: text = Cleaner.preprocess_text(rfpath) elif text is None: raise ValueError() self.buildTreesAndDics(text) self.prefixTree.set_entropy() self.suffixTree.set_entropy() self.words = dict()