# 33. サ変名詞 # サ変接続の名詞をすべて抽出せよ. from knock_30 import parse from itertools import chain, islice sa_irregular_nouns = { word['base'] for word in chain.from_iterable(parse()) if word['pos1'] == "サ変接続" } for i in islice(sa_irregular_nouns, 10): print(i)
# 32. 動詞の原形 # 動詞の原形をすべて抽出せよ. from knock_30 import parse from itertools import chain, islice verb_bases = { word['base'] for word in chain.from_iterable(parse()) if word['pos'] == "動詞" } for i in islice(verb_bases, 10): print(i)
# 34. 「AのB」 # 2つの名詞が「の」で連結されている名詞句を抽出せよ. from knock_30 import parse from itertools import chain, islice noun_phrases = set() for sent in parse(): for idx in range(len(sent) - 2): if sent[idx]['pos'] == '名詞' and sent[idx + 2]['pos'] == '名詞' and sent[ idx + 1]['surface'] == 'の': noun_phrases.add( f"{sent[idx]['surface']}{'の'}{sent[idx+2]['surface']}") for i in islice(noun_phrases, 10): print(i)
def count(): word_count = Counter(word['base'] for word in chain.from_iterable(parse()) if not re.match(r'[!-〿]|\s', word['base'])) return word_count
# 31. 動詞 # 動詞の表層形をすべて抽出せよ. from knock_30 import parse from itertools import chain, islice verb_surfaces = {word['surface'] for word in chain.from_iterable(parse()) if word['pos'] == "動詞"} for i in islice(verb_surfaces, 10): print(i)