def build_cnter(query: dict, *, verbose=False) -> Counter[str]: [(tgt_key, tgt_val)] = query.items() cnter = Counter() for sentence in tqdm(mecab_into_sentences()): cnter += Counter(d[tgt_key] for d in sentence) if verbose: with Renderer(f"「{tgt_val}」の出現頻度") as out: out.header("上位 10 個") pprint.pprint(cnter.most_common(10), stream=sys.stderr) out.result("種類", len(cnter)) return cnter
https://nlp100.github.io/ja/ch04.html#33-aのb [Usage] python knock33.py """ import os import sys from typing import Dict, List from knock30 import mecab_into_sentences sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) from kiyuna.utils.message import Renderer # noqa: E402 isort:skip Morpheme = Dict[str, str] Sentence = List[Morpheme] if __name__ == "__main__": tgt = "AのB" res = [] for sentence in mecab_into_sentences(): for a, no, b in zip(sentence, sentence[1:], sentence[2:]): if (a["pos"], no["surface"], b["pos"]) == ("名詞", "の", "名詞"): res.append("".join(map(lambda x: x["surface"], (a, no, b)))) with Renderer(tgt) as out: out.result("数", len(res)) out.result("種類", len(set(res))) out.result("上から 10 個", res[:10])