예제 #1
0
 def _build_index(self, texts: Set[str], stop_words: Set[str]) -> bool:
     """
     为所有的关键词建立正排索引和倒排索引
     """
     index, invert_index = defaultdict(set), defaultdict(set)
     for text in texts:
         roots = set(cut_word(text))
         roots = roots - stop_words
         index[text] = index[text].union(roots)
         _ = [invert_index[root].add(text) for root in roots]
     self.index, self.invert_index = index, invert_index
     return True
예제 #2
0
def parse_keys(keys: List[str]) -> List[str]:
    space = ' '
    return [space.join(cut_word(key)) for key in keys]
예제 #3
0
def get_counter(words: str):
    """
    统计一个数组中每个词语出现的次数,键是改词语,值是该词语出现的次数
    """
    return Counter(cut_word(words))