def compute_oracle_derivation(parser, dsg, mapping=id):
    validationMethod = "F1"
    best_der = None
    best_f1 = -1
    best_prec = -1
    best_rec = -1

    relevant = dsg.labeled_frames(guard=lambda x: x[1] > 0)
    for _, derivation in parser.k_best_derivation_trees():
        dog, sync = dog_evaluation(derivation)
        dsg2 = DeepSyntaxGraph(dsg.sentence, mapping(dog), sync)
        retrieved = dsg2.labeled_frames(guard=lambda x: x[1] > 0)

        inters = retrieved & relevant

        # in case of parse failure there are two options here:
        #   - parse failure -> no spans at all, thus precision = 1
        #   - parse failure -> a dummy tree with all spans wrong, thus precision = 0

        precision = 1.0 * len(inters) / len(retrieved) \
            if len(retrieved) > 0 else 0
        recall = 1.0 * len(inters) / len(relevant) \
            if len(relevant) > 0 else 0
        fmeasure = 2.0 * precision * recall / (precision + recall) \
            if precision + recall > 0 else 0

        if (validationMethod == "F1" and fmeasure > best_f1)\
                or (validationMethod == "Precision" and precision > best_prec)\
                or (validationMethod == "Recall"and recall > best_rec):
            best_der, best_f1, best_prec, best_rec = derivation, fmeasure, precision, recall

    return best_der
Example #2
0
def build_dummy_dsg(sentence, label):
    dog = DirectedOrderedGraph()
    sync = []
    for i, word in enumerate(sentence):
        dog.add_node(i)
        dog.add_terminal_edge([], (word[0], word[1], word[2], '_'), i)
        sync.append([i])

    return DeepSyntaxGraph(sentence, dog, sync, label=label)
Example #3
0
def generate_sdg(n_nodes,
                 maximum_inputs=4,
                 upward_closed=False,
                 new_output=0.1,
                 multiple_output=0.0):
    dog = generate(n_nodes, maximum_inputs, upward_closed, new_output,
                   multiple_output)
    sentence = [dog.incoming_edge(node) for node in dog.nodes]
    sync = [[node] for node in dog.nodes]
    dsg = DeepSyntaxGraph(sentence, dog, sync)
    return dsg
def worker(parser, graph, return_dict):
    parser.parse()
    if parser.recognized():
        derivation = parser.best_derivation_tree()
        assert derivation is not None
        dog, sync_list = dog_evaluation(derivation)
        result = DeepSyntaxGraph(graph.sentence,
                                 dog,
                                 sync_list,
                                 label=graph.label)
        return_dict[0] = result
    def parsing_postprocess(self, sentence, derivation, label=None):
        dog, sync = dog_evaluation(derivation, compress=False)

        if self.induction_settings.binarize:
            dog = dog.debinarize(is_bin=self.induction_settings.is_bin)

        if not dog.output_connected():
            self.statistics.not_output_connected += 1
            if self.interactive:
                z2 = render_and_view_dog(dog, "parsed_" + str(label))
                # z2.communicate()

        return DeepSyntaxGraph(sentence, dog, sync)
Example #6
0
def parse_sentence(lines, label=None):
    dog = DirectedOrderedGraph()
    arguments = defaultdict(list)
    predicates = {}
    predicate_list = []
    sentence = []
    synchronization = []
    for line in lines:
        contents = line.split()
        assert (len(contents) >= 7)
        idx = int(contents[0])
        dog.add_node(idx)
        form = contents[1]
        lemma = contents[2]
        pos = contents[3]
        frame = contents[6]
        top = contents[4] is '+'
        if top:
            dog.add_to_outputs(idx)
        pred = contents[5] is '+'
        if pred:
            predicates[idx] = (form, lemma, pos, frame)
            predicate_list.append(idx)
        else:
            dog.add_terminal_edge([], (form, lemma, pos, frame), idx)
        args = contents[7:]
        sentence.append((form, lemma, pos))
        synchronization.append([idx])
        for i, arg in enumerate(args):
            if arg is not '_':
                arguments[i].append((idx, arg))

    # print(predicates)
    # print(predicate_list)
    # print(arguments)
    for idx in predicates:
        edge = dog.add_terminal_edge(arguments[predicate_list.index(idx)],
                                     predicates[idx], idx)
        for i, arg in enumerate(arguments[predicate_list.index(idx)]):
            edge.set_function(i, arg[1])

    dsg = DeepSyntaxGraph(sentence, dog, synchronization, label=label)
    return dsg
 def parsing_postprocess(self, sentence, derivation, label=None):
     assert derivation is not None
     dog, sync_list = dog_evaluation(derivation)
     return DeepSyntaxGraph(sentence, dog, sync_list, label=label)
def do_parsing(parser,
               test_dsgs,
               term_labeling_token,
               oracle=False,
               debinarize=id):
    interactive = True  # False

    scorer = PredicateArgumentScoring()

    not_output_connected = 0

    start = time.time()
    for dsg in test_dsgs:
        parser.set_input(term_labeling_token.prepare_parser_input(
            dsg.sentence))
        parser.parse()

        f = lambda token: token.pos() if isinstance(token, ConstituentTerminal
                                                    ) else token
        dsg.dog.project_labels(f)

        if parser.recognized():
            if oracle:
                derivation = compute_oracle_derivation(parser, dsg, debinarize)
            else:
                derivation = parser.best_derivation_tree()
            dog, sync = dog_evaluation(derivation)

            if not dog.output_connected():
                not_output_connected += 1
                if interactive:
                    z2 = render_and_view_dog(dog, "parsed_" + dsg.label)
                    # z2.communicate()

            dsg2 = DeepSyntaxGraph(dsg.sentence, debinarize(dog), sync)

            scorer.add_accuracy_frames(
                dsg.labeled_frames(guard=lambda x: len(x[1]) > 0),
                dsg2.labeled_frames(guard=lambda x: len(x[1]) > 0))

            # print('dsg: ', dsg.dog, '\n', [dsg.get_graph_position(i) for i in range(len(dsg.sentence))], '\n\n parsed: ', dsg2.dog, '\n', [dsg2.get_graph_position(i+1) for i in range(len(dsg2.sentence))])
            # print()
            if False and interactive:
                if dsg.label == 's50':
                    pass
                if dsg.dog != dog:
                    z1 = render_and_view_dog(dsg.dog, "corpus_" + dsg.label)
                    z2 = render_and_view_dog(dog, "parsed_" + dsg.label)
                    z1.communicate()
                    z2.communicate()
        else:

            scorer.add_failure(
                dsg.labeled_frames(guard=lambda x: len(x[1]) > 0))

        parser.clear()
    print("Completed parsing in", time.time() - start, "seconds.")
    print("Parse failures:", scorer.labeled_frame_scorer.n_failures())
    print("Not output connected", not_output_connected)
    print("Labeled frames:")
    print("P", scorer.labeled_frame_scorer.precision(), "R",
          scorer.labeled_frame_scorer.recall(), "F1",
          scorer.labeled_frame_scorer.fmeasure(), "EM",
          scorer.labeled_frame_scorer.exact_match())
    print("Unlabeled frames:")
    print("P", scorer.unlabeled_frame_scorer.precision(), "R",
          scorer.unlabeled_frame_scorer.recall(), "F1",
          scorer.unlabeled_frame_scorer.fmeasure(), "EM",
          scorer.unlabeled_frame_scorer.exact_match())
    print("Labeled dependencies:")
    print("P", scorer.labeled_dependency_scorer.precision(), "R",
          scorer.labeled_dependency_scorer.recall(), "F1",
          scorer.labeled_dependency_scorer.fmeasure(), "EM",
          scorer.labeled_dependency_scorer.exact_match())
    print("Unlabeled dependencies:")
    print("P", scorer.unlabeled_dependency_scorer.precision(), "R",
          scorer.unlabeled_dependency_scorer.recall(), "F1",
          scorer.unlabeled_dependency_scorer.fmeasure(), "EM",
          scorer.unlabeled_dependency_scorer.exact_match())
    return scorer