def _build_index(self, texts: Set[str], stop_words: Set[str]) -> bool: """ 为所有的关键词建立正排索引和倒排索引 """ index, invert_index = defaultdict(set), defaultdict(set) for text in texts: roots = set(cut_word(text)) roots = roots - stop_words index[text] = index[text].union(roots) _ = [invert_index[root].add(text) for root in roots] self.index, self.invert_index = index, invert_index return True
def parse_keys(keys: List[str]) -> List[str]: space = ' ' return [space.join(cut_word(key)) for key in keys]
def get_counter(words: str): """ 统计一个数组中每个词语出现的次数,键是改词语,值是该词语出现的次数 """ return Counter(cut_word(words))