Esempio n. 1
0
    def __init__(self, rfpath, max_len=4):
        self.prefixTree = Trie()
        self.suffixTree = Trie(direction='suffix')

        self.vocabulary = []
        self.len_dict = dict()
        # 想要计n个字的词必须用n+1-gram
        self.max_len = max_len + 1

        text = Cleaner.preprocess_text(rfpath)
        self.buildTreesAndDics(text)
        self.prefixTree.set_entropy()
        self.suffixTree.set_entropy()

        self.words = dict()
Esempio n. 2
0
    def __init__(self, rfpath=None, text=None, max_len=4):
        self.prefixTree = Trie()
        self.suffixTree = Trie(direction='suffix')

        self.vocabulary = []
        self.len_dict = dict()
        # 想要计n个字的词必须用n+1-gram
        self.max_len = max_len + 1

        if rfpath is not None:
            text = Cleaner.preprocess_text(rfpath)
        elif text is None:
            raise ValueError()

        self.buildTreesAndDics(text)
        self.prefixTree.set_entropy()
        self.suffixTree.set_entropy()

        self.words = dict()