def test_subgrouping(self):
     start = 4
     stop = 4
     exclude = []
     path = "res/tiger/tiger_8000.xml"
     dsgs = sentence_names_to_deep_syntax_graphs(
         ['s' + str(i) for i in range(start, stop + 1) if i not in exclude]
         , path
         , hold=False
         , reorder_children=True)
     f = lambda token: token.pos() if isinstance(token, ConstituentTerminal) else token
     for dsg in dsgs:
         dsg.dog.project_labels(f)
         render_and_view_dog(dsg.dog, "tigerdsg4", "/tmp/")
         print(list(map(lambda x: x.form(), dsg.sentence)))
         print(dsg.synchronization)
         print(dsg.recursive_partitioning())
         print(fanout_limited_partitioning(dsg.recursive_partitioning(), 1))
         print(dsg.recursive_partitioning(subgrouping=True))
         print(fanout_limited_partitioning(dsg.recursive_partitioning(subgrouping=True), 1))
         self.assertTupleEqual(({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
                                , [({0, 1, 2, 3, 4},
                                    [({0}, []), ({1}, []), ({2}, []), ({3, 4}, [({3}, []), ({4}, [])])]),
                                   ({5}, []),
                                   ({6, 7, 8, 9, 10, 11},
                                    [({8, 9, 10, 6, 7}, [({8, 6, 7}, [({6}, []), ({7}, []), ({8}, [])]),
                                        ({9, 10}, [({9}, []), ({10}, [])])]), ({11}, [])])
                                   ]),
                               dsg.recursive_partitioning(subgrouping=True))
         self.assertTupleEqual(({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, [
             ({0, 1, 2, 3, 4, 5}, [({0, 1, 2, 3, 4}, [({0, 1, 2}, [({0, 1}, [({0}, []), ({1}, [])]), ({2}, [])]),
                                                      ({3, 4}, [({3}, []), ({4}, [])])]), ({5}, [])]), (
                 {6, 7, 8, 9, 10, 11}, [({8, 9, 10, 6, 7}, [({8, 6, 7}, [({6, 7}, [({6}, []), ({7}, [])]), (
                     {8}, [])]), ({9, 10}, [({9}, []), ({10}, [])])]), ({11}, [])])]),
                               fanout_limited_partitioning(dsg.recursive_partitioning(subgrouping=True), 1))
    def test_dot_export(self):
        dsg = sentence_names_to_deep_syntax_graphs(["s26954"], "res/tiger/tiger_s26954.xml", hold=False)[0]

        f = lambda token: token.form() if isinstance(token, ConstituentTerminal) else token
        dsg.dog.project_labels(f)

        dot = dsg.dog.export_dot("s26954")
        print(dot)

        render_and_view_dog(dsg.dog, "foo")
Beispiel #3
0
    def __process_single_dsg(self, i, dsg, rec_part_strat, terminal_labeling):
        if True or len(dsg.dog.outputs) > 1:
            print(i, dsg, dsg.label)
            # if i == 89:
            # render_and_view_dog(dsg.dog, 'dm0', 'dm0')
            # render_and_view_dog(corpus[1].dog, 'dm1', 'dm1')
            # print(dsg.sentence, dsg.synchronization, dsg.label)

            # dog39 = dsg.dog.extract_dog([39], [], enforce_outputs=False)
            # render_and_view_dog(dog39, "dog39")

            rec_part = rec_part_strat(dsg)

            if False and i == 89:
                pretty_print_rec_partitioning(rec_part)

            decomp = compute_decomposition(dsg, rec_part)
            # print(decomp)

            grammar = induce_grammar_from(dsg,
                                          rec_part,
                                          decomp,
                                          terminal_labeling=terminal_labeling,
                                          enforce_outputs=False,
                                          normalize=True)
            if False and i == 89:
                print(grammar)

            parser = LCFRS_parser(grammar)
            parser.set_input(list(map(terminal_labeling, dsg.sentence)))

            print("parsing")

            parser.parse()
            self.assertTrue(parser.recognized())

            derivation = parser.best_derivation_tree()
            self.assertNotEqual(derivation, None)

            dog, sync_list = dog_evaluation(derivation)

            dsg.dog.project_labels(terminal_labeling)

            if False and i == 89:
                render_and_view_dog(dsg.dog, "corpus", "corpus_graph")
                render_and_view_dog(dog, "parse_result", "parse_result")

            print("comparing")

            self.assertEqual(dog, dsg.dog)
Beispiel #4
0
 def test_dog_generation(self):
     for rec_part_strat in self.rec_part_strategies:
         for i in range(50):
             dsg = generate_sdg(randint(2, 12), maximum_inputs=3)
             if rec_part_strat == extract_recursive_partitioning and dsg.dog.cyclic(
             ):
                 continue
             # render_and_view_dog(dsg.dog, 'random_dog_' + str(i))
             try:
                 self.__process_single_dsg(i,
                                           dsg,
                                           rec_part_strat,
                                           terminal_labeling=str)
             except AssertionError:
                 render_and_view_dog(dsg.dog, 'random_dog_' + str(i))
                 self.__process_single_dsg(i,
                                           dsg,
                                           rec_part_strat,
                                           terminal_labeling=str)
    def parsing_postprocess(self, sentence, derivation, label=None):
        dog, sync = dog_evaluation(derivation, compress=False)

        if self.induction_settings.binarize:
            dog = dog.debinarize(is_bin=self.induction_settings.is_bin)

        if not dog.output_connected():
            self.statistics.not_output_connected += 1
            if self.interactive:
                z2 = render_and_view_dog(dog, "parsed_" + str(label))
                # z2.communicate()

        return DeepSyntaxGraph(sentence, dog, sync)
 def test_dog_generation(self):
     for i in range(10):
         dog = generate(randint(2, 12), maximum_inputs=4, new_output=0.4, upward_closed=True)
         render_and_view_dog(dog, 'random_dog_' + str(i))
    def test_induction_on_a_corpus(self):
        interactive = False
        start = 1
        stop = 50
        path = "res/tiger/tiger_release_aug07.corrected.16012013.utf8.xml"
        # path = "res/tiger/tiger_8000.xml"
        exclude = []
        dsgs = sentence_names_to_deep_syntax_graphs(
            ['s' + str(i) for i in range(start, stop + 1) if i not in exclude]
            , path
            , hold=False)

        rec_part_strategy = the_recursive_partitioning_factory().get_partitioning('cfg')[0]

        def label_edge(edge):
            if isinstance(edge.label, ConstituentTerminal):
                return edge.label.pos()
            else:
                return edge.label
        nonterminal_labeling = lambda nodes, dsg: simple_labeling(nodes, dsg, label_edge)

        term_labeling_token = PosTerminals()
        def term_labeling(token):
            if isinstance(token, ConstituentTerminal):
                return term_labeling_token.token_label(token)
            else:
                return token

        grammar = induction_on_a_corpus(dsgs, rec_part_strategy, nonterminal_labeling, term_labeling, normalize=True)
        grammar.make_proper()

        parser = CFGParser(grammar)

        scorer = PredicateArgumentScoring()

        for dsg in dsgs:
            parser.set_input(term_labeling_token.prepare_parser_input(dsg.sentence))
            parser.parse()
            self.assertTrue(parser.recognized())
            derivation = parser.best_derivation_tree()
            dog, sync = dog_evaluation(derivation)
            dsg2 = DeepSyntaxGraph(dsg.sentence, dog, sync)

            f = lambda token: token.pos() if isinstance(token, ConstituentTerminal) else token
            dsg.dog.project_labels(f)
            parser.clear()

            scorer.add_accuracy_frames(
                dsg.labeled_frames(guard=lambda x: len(x[1]) > 0),
                dsg2.labeled_frames(guard=lambda x: len(x[1]) > 0)
            )

            # print('dsg: ', dsg.dog, '\n', [dsg.get_graph_position(i) for i in range(len(dsg.sentence))],
            # '\n\n parsed: ', dsg2.dog, '\n', [dsg2.get_graph_position(i+1) for i in range(len(dsg2.sentence))])
            # print()
            if interactive:
                if dsg.label == 's50':
                    pass
                if dsg.dog != dog:
                    z1 = render_and_view_dog(dsg.dog, "corpus_" + dsg.label)
                    z2 = render_and_view_dog(dog, "parsed_" + dsg.label)
                    z1.communicate()
                    z2.communicate()

        print("Labeled frames:")
        print("P", scorer.labeled_frame_scorer.precision(), "R", scorer.labeled_frame_scorer.recall(),
              "F1", scorer.labeled_frame_scorer.fmeasure())
        print("Labeled dependencies:")
        print("P", scorer.labeled_dependency_scorer.precision(), "R", scorer.labeled_dependency_scorer.recall(),
              "F1", scorer.labeled_dependency_scorer.fmeasure())
def do_parsing(parser,
               test_dsgs,
               term_labeling_token,
               oracle=False,
               debinarize=id):
    interactive = True  # False

    scorer = PredicateArgumentScoring()

    not_output_connected = 0

    start = time.time()
    for dsg in test_dsgs:
        parser.set_input(term_labeling_token.prepare_parser_input(
            dsg.sentence))
        parser.parse()

        f = lambda token: token.pos() if isinstance(token, ConstituentTerminal
                                                    ) else token
        dsg.dog.project_labels(f)

        if parser.recognized():
            if oracle:
                derivation = compute_oracle_derivation(parser, dsg, debinarize)
            else:
                derivation = parser.best_derivation_tree()
            dog, sync = dog_evaluation(derivation)

            if not dog.output_connected():
                not_output_connected += 1
                if interactive:
                    z2 = render_and_view_dog(dog, "parsed_" + dsg.label)
                    # z2.communicate()

            dsg2 = DeepSyntaxGraph(dsg.sentence, debinarize(dog), sync)

            scorer.add_accuracy_frames(
                dsg.labeled_frames(guard=lambda x: len(x[1]) > 0),
                dsg2.labeled_frames(guard=lambda x: len(x[1]) > 0))

            # print('dsg: ', dsg.dog, '\n', [dsg.get_graph_position(i) for i in range(len(dsg.sentence))], '\n\n parsed: ', dsg2.dog, '\n', [dsg2.get_graph_position(i+1) for i in range(len(dsg2.sentence))])
            # print()
            if False and interactive:
                if dsg.label == 's50':
                    pass
                if dsg.dog != dog:
                    z1 = render_and_view_dog(dsg.dog, "corpus_" + dsg.label)
                    z2 = render_and_view_dog(dog, "parsed_" + dsg.label)
                    z1.communicate()
                    z2.communicate()
        else:

            scorer.add_failure(
                dsg.labeled_frames(guard=lambda x: len(x[1]) > 0))

        parser.clear()
    print("Completed parsing in", time.time() - start, "seconds.")
    print("Parse failures:", scorer.labeled_frame_scorer.n_failures())
    print("Not output connected", not_output_connected)
    print("Labeled frames:")
    print("P", scorer.labeled_frame_scorer.precision(), "R",
          scorer.labeled_frame_scorer.recall(), "F1",
          scorer.labeled_frame_scorer.fmeasure(), "EM",
          scorer.labeled_frame_scorer.exact_match())
    print("Unlabeled frames:")
    print("P", scorer.unlabeled_frame_scorer.precision(), "R",
          scorer.unlabeled_frame_scorer.recall(), "F1",
          scorer.unlabeled_frame_scorer.fmeasure(), "EM",
          scorer.unlabeled_frame_scorer.exact_match())
    print("Labeled dependencies:")
    print("P", scorer.labeled_dependency_scorer.precision(), "R",
          scorer.labeled_dependency_scorer.recall(), "F1",
          scorer.labeled_dependency_scorer.fmeasure(), "EM",
          scorer.labeled_dependency_scorer.exact_match())
    print("Unlabeled dependencies:")
    print("P", scorer.unlabeled_dependency_scorer.precision(), "R",
          scorer.unlabeled_dependency_scorer.recall(), "F1",
          scorer.unlabeled_dependency_scorer.fmeasure(), "EM",
          scorer.unlabeled_dependency_scorer.exact_match())
    return scorer