verbframedict[verbbase].append( (adp, chunks[i].show_only_words())) noun = set() verb = set() return verbframedict if __name__ == "__main__": with open("ai.ja1.txt.parsed", "r") as ai: ai = ai.readlines() ai_morphs = [] for i in range(len(ai)): ai_morphs.append(Morph(ai[i])) sents = morph2sents(ai_morphs) frames = defaultdict(list) for sent in sents: dep = morph2chunk(sent) verbframe2(dep, frames) #コーパス中で頻出する述語と格パターンの組み合わせ for key, value in sorted(frames.items(), key=lambda x: len(x[1]), reverse=True)[:10]: output = defaultdict(list) for elem in value: output[elem[0]].append(elem[1]) print(key, end="\t") print(" ".join(output.keys()), end="\t") print(" ".join([" ".join(elem) for elem in output.values()]))
from collections import defaultdict from knock40 import Morph, morph2sents from knock41 import Chunk, morph2chunk, sources if __name__ == "__main__": with open("ai.ja1.txt.parsed", "r") as ai: ai = ai.readlines() ai_morphs = [] for i in range(len(ai)): ai_morphs.append(Morph(ai[i])) sents = morph2sents(ai_morphs) for sent in sents: chunks = morph2chunk(sent) sources(chunks) for i in range(len(chunks)): #print(chunk.meta) #print(chunk.show_bunsetsu_tag()) if "サ変接続" in chunks[i].show_morph_pos1(): if "を" in chunks[i].show_only_listwords(): print(chunks[i].show_only_words(), end="") goto = chunks[i].dst verb = chunks[goto].show_base_for_X("動詞") adpos = set() dep = [] if verb != None: if chunks[goto].srcs != []: for head_id in chunks[goto].srcs: adp = chunks[head_id].show_base_for_X("助詞") if adp != None and head_id != i:
def noun2verb(chunks): noun = set() verb = set() for i in range(len(chunks)): for morph in chunks[i].morphs: noun.add(morph.pos) if "名詞" in noun: goto = chunks[i].dst for morph2 in chunks[goto].morphs: verb.add(morph2.pos) if "動詞" in verb: print(chunks[i].show_only_words(), end="\t") print(chunks[goto].show_only_words()) noun = set() verb = set() if __name__ == "__main__": with open("ai.ja1.txt.parsed", "r") as ai: ai = ai.readlines() ai_morphs = [] for i in range(len(ai)): ai_morphs.append(Morph(ai[i])) dep = morph2chunk(morph2sents(ai_morphs)[1]) noun2verb(dep)