Ejemplo n.º 1
0
    #data = {}
    for idx, sentence in enumerate(sentences):
        #print(idx , sentence)
        #data = sentence_to_dot(sentence, data)

        _dots.append(sentence_to_dot(idx, sentence))
    return _dots


def save_graph(dot, file_name):
    g = pydotplus.graph_from_dot_data(dot)
    g.write_jpeg(file_name, prog='dot')


if __name__ == '__main__':
    chunkeds = list('neko.txt.cabocha')
    paired_sentences = make_paired(chunkeds)
    dots = sentences_to_dots(paired_sentences)

    for idx in range(1):
        save_graph(dots[idx], 'graph.jpg')

    #print(dots[0])
    #print(dots[0][0])
    '''
    from graphviz import Digraph

    # formatはpngを指定(他にはPDF, PNG, SVGなどが指定可)
    G = Digraph(format='png')
    G.attr('node', shape='square')
Ejemplo n.º 2
0
from sample46 import sorted_double_list

def sahen_case_frame_patterns(_chunked_sentences):
    _sahen_case_frame_patterns = []
    for sentence in _chunked_sentences:
        for _chunk in sentence:
            if not _chunk.has_verb():
                continue

            sahen_connection_noun = [c.join_morphs() for c in sentence if c.dst == _chunk.srcs and c.has_sahen_connection_noun_plus_wo()]
            clauses = [c.join_morphs() for c in sentence if c.dst == _chunk.srcs and not c.has_sahen_connection_noun_plus_wo() and c.has_particle()]
            particles = [c.last_particle().base for c in sentence if c.dst == _chunk.srcs and not c.has_sahen_connection_noun_plus_wo() and c.has_particle()]

            if len(sahen_connection_noun) > 0 and len(particles) > 0:
                _sahen_case_frame_patterns.append([sahen_connection_noun[0] + _chunk.first_verb().base, *sorted_double_list(particles, clauses)])

    return _sahen_case_frame_patterns


def print_ans(_sahen_case_frame_patterns):
    ans = []
    for case in _sahen_case_frame_patterns:
        ans.append(case[0] + '\t' + ' '.join(case[1]) + '\t' + ' '.join(case[2]))

    for i in range(10):
        print(ans[i])

if __name__ == '__main__':
    chunkeds = sample41.list('neko.txt.cabocha')
    print_ans(sahen_case_frame_patterns(chunkeds))