コード例 #1
0
def extraction_pos1(list_srcs_dst, pos_1, pos_2):
    pair = []
    for line in list_srcs_dst:
        dic_morph = defaultdict(lambda: '')  # 動詞:動詞に係る助詞
        dic_phrase = defaultdict(lambda: '')
        for phrase in line:
            morph_1 = phrase[1].check_pos_morph(pos_1)
            morph_2 = phrase[0].check_pos_morph(pos_2)
            phrase_2 = phrase[0].check_pos_phrase(pos_2)
            if morph_1 is None:
                continue
            if morph_2 is not None:
                # if morph_2 not in dic_morph[morph_1]:
                dic_morph[morph_1] += ' ' + morph_2
                dic_phrase[morph_1] += ' ' + phrase_2

        for k, v in dic_morph.items():
            pair.append([k, v, dic_phrase[k]])

    return pair


if __name__ == '__main__':
    text = 'neko.txt.cabocha'
    sentences = chunk_list(text)
    list_ = srcs_dst_list(sentences)
    with open('knock46.txt', 'w') as f:
        dic = extraction_pos1(list_, '動詞', '助詞')
        for pair in dic:
            f.write(f'{pair[0]}\t{pair[1]}\t{pair[2]}\n')
コード例 #2
0
ファイル: knock44.py プロジェクト: ise-ab/100knock2018
# -*- coding: utf-8 -*-

from knock41 import chunk_list
from knock42 import srcs_dst_list
from graphviz import Digraph
import pydot


if __name__ == '__main__':
    text = 'neko.txt.cabocha'
    sentences = chunk_list(text)
    G = Digraph(format='png')
    G.attr('node', shape='circle')
    list_srcs_dst = srcs_dst_list(sentences)
    edge = []
    for i, line in enumerate(list_srcs_dst):
        if i < 10:
            for pair in line:
                edge.append((str(pair[0].phrase_surface()), str(pair[1].phrase_surface())))
                G.edge(str(pair[0].phrase_surface()), str(pair[1].phrase_surface()))
    G.render('binary_tree_graphviz')
    g = pydot.graph_from_edges(edge)
    g.write_jpeg('binary_tree_pydot.png', prog='dot')

    # for i, (dst, srcs) in enumerate(list_srcs_dst.items()):
    #     if i < 100:
    #         edge.append((str(dst.phrase_surface()), str(srcs.phrase_surface())))
    #         G.edge(str(dst.phrase_surface()), str(srcs.phrase_surface()))
    # G.render('binaryfield_tree_graphviz')
    # g = pydot.graph_from_edges(edge)
    # g.write_jpeg('binary_tree_pydot.png', prog='dot')