Esempio n. 1
0
from datetime import datetime
from nltk.parse.corenlp import CoreNLPDependencyParser
from nltk.parse.dependencygraph import DependencyGraph

parser = CoreNLPDependencyParser(url='http://localhost:9000')

sentence = "The trophy would not fit in the brown suitcase because it was too big"
# sentence = "I spread the roth on the table in order to protect it"
# sentence = "On the table I've spread the roth in order to protect it"
# sentence = "The city councilmen refused the demonstrators a permit because they feared violence"
# sentence = "She said he told her their secrets"
sentence = "The monkey said the bird told the elephant he was dangerous."
sentence = "The women stopped taking the pills because they were carcinogenic."
sentence = "Marta has a cat, her cat is brown"
parse, = parser.raw_parse(sentence)
conll = parse.to_conll(4)
print(conll)
dg = DependencyGraph(conll)
dotted = dg.to_dot()
G = dg.nx_graph()
f = open('hoy_' + str(datetime.now()) + '.svg', 'w')
svg = dg._repr_svg_()
f.write(svg)
Esempio n. 2
0
from datetime import datetime
from nltk.parse.corenlp import CoreNLPDependencyParser
from nltk.parse.dependencygraph import DependencyGraph

parser = CoreNLPDependencyParser(url='http://localhost:9000')

# filename = "text6"
# f = open("../Fragments_for_testing/"+filename, "r")
# sentences = f.readlines()
# for sentence in sentences:
sentence = "Elephants are big. Monkeys are small"
parse, = parser.raw_parse(sentence)
conll = parse.to_conll(4)
dp = DependencyGraph(conll)
dotted = dp.to_dot()
G = dp.nx_graph()
f = open('test_' + str(datetime.now()) + '.svg', 'w')
svg = dp._repr_svg_()
f.write(svg)
def output_conllu(filename, sents, pos, stags, arcs, rels, dependencies,
                  new_edges, output_dir, result_file):
    scores = {}
    with open(result_file) as fin:
        for line in fin:
            line = line.split()
            scores[(int(line[0]), int(line[1]))] = int(line[2])
    tree_prop_file = 'd6.treeproperties'
    t2props_dict = get_t2props_dict(tree_prop_file)
    t2topsub_dict = get_t2topsub_dict(tree_prop_file)
    #for sent_idx in range(len(sents)):
    for sent_idx in [21]:
        deps_sent = dependencies[sent_idx]
        for dep_idx, dep in enumerate(deps_sent):
            unbounded_dep = dep
            #start = min(int(dep[0]), int(dep[1]))-1
            start = 25
            #end = max(int(dep[0]), int(dep[1]))+1
            end = 33
            conllu = ''
            sent = sents[sent_idx]
            pos_sent = pos[sent_idx]
            stags_sent = stags[sent_idx]
            arcs_sent = arcs[sent_idx]
            rels_sent = rels[sent_idx]
            token_idx = int(dep[1])
            output_list = [
                str(token_idx),
                sent[token_idx - 1] + '_' + stags_sent[token_idx - 1], '_',
                stags_sent[token_idx - 1], pos_sent[token_idx - 1], '_',
                str(dep[0]), dep[2], '_', '_'
            ]
            conllu += '\t'.join(output_list)
            conllu += '\n'
            for token_idx in range(len(sent)):
                if token_idx >= start and token_idx <= end:
                    #if  arcs_sent[token_idx] >= start and arcs_sent[token_idx] <= end:
                    output_list = [
                        str(token_idx + 1),
                        sent[token_idx] + '_' + stags_sent[token_idx], '_',
                        stags_sent[token_idx], pos_sent[token_idx], '_',
                        str(arcs_sent[token_idx]), rels_sent[token_idx], '_',
                        '_'
                    ]
                    conllu += '\t'.join(output_list)
                    conllu += '\n'
            for new_idx, dep in enumerate(new_edges[sent_idx]):
                if dep[0] >= start and dep[0] <= end:
                    #if  dep[1] >= start and dep[1] <= end:
                    token_idx = int(dep[0])
                    output_list = [
                        str(token_idx),
                        sent[token_idx - 1] + '_' + stags_sent[token_idx - 1],
                        '_', stags_sent[token_idx - 1],
                        pos_sent[token_idx - 1], '_',
                        str(dep[1]), dep[2], '_', '_'
                    ]
                    conllu += '\t'.join(output_list)
                    conllu += '\n'
            graph = DependencyGraph(conllu)
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)
            output_file = os.path.join(
                output_dir,
                'sent{}_dep{}_correct{}.gv'.format(sent_idx, dep_idx,
                                                   scores[(sent_idx,
                                                           dep_idx)]))
            dot_string = graph.to_dot()
            ## add colors
            new_dot_string = ''
            new_lines = [
                '{} -> {} [label="{}"]'.format(dep[1], dep[0], dep[2])
                for dep in new_edges[sent_idx]
            ]
            for line in dot_string.split('\n'):
                line = line.strip()
                if line == '{} -> {} [label="{}"]'.format(
                        unbounded_dep[0], unbounded_dep[1], unbounded_dep[2]):
                    line = '{} -> {} [label="{}", color="red"]'.format(
                        unbounded_dep[1], unbounded_dep[0], unbounded_dep[2])
                elif line in new_lines:
                    line = line[:-1] + ', color="blue"]'
                new_dot_string += line
                new_dot_string += '\n'
            with open(output_file, 'wt') as fout:
                fout.write(new_dot_string)