コード例 #1
0
ファイル: eval.py プロジェクト: UFAL-DSG/tgen
    def process_eval_doc(self, eval_doc, gen_trees, language, ref_selector, target_selector):
        """Evaluate generated trees against a reference document; save per-tree statistics
        in the reference document and print out global statistics.

        Does not reset statistics at the beginning (must be reset manually if needed).

        @param eval_doc: reference t-tree document
        @param gen_trees: a list of generated TreeData objects
        @param language: language for the reference document
        @param ref_selector: selector for reference trees in the reference document
        @param target_selector: selector for generated trees (used to save statistics)
        """
        log_info('Evaluating...')
        for eval_bundle, gen_tree, in zip(eval_doc.bundles, gen_trees):
            # add some stats about the tree directly into the output file
            eval_ttree = eval_bundle.get_zone(language, ref_selector).ttree
            gen_ttree = TreeNode(gen_tree)
            add_bundle_text(eval_bundle, language, target_selector + 'Xscore',
                            "P: %.4f R: %.4f F1: %.4f" %
                            p_r_f1_from_counts(*corr_pred_gold(eval_ttree, gen_ttree)))
            # collect overall stats
            # TODO maybe add cost somehow?
            self.append(eval_ttree, gen_ttree)

        # print out the overall stats
        log_info("NODE precision: %.4f, Recall: %.4f, F1: %.4f" % self.p_r_f1())
        log_info("DEP  precision: %.4f, Recall: %.4f, F1: %.4f" % self.p_r_f1(EvalTypes.DEP))
        log_info("Tree size stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" % self.size_stats())
        log_info("Score stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" % self.score_stats())
        log_info("Common subtree stats:\n -- SIZE: %s\n -- ΔGLD: %s\n -- ΔPRD: %s" %
                 self.common_substruct_stats())
コード例 #2
0
def eval_trees(das, eval_ttrees, gen_ttrees, eval_doc, language, selector):
    """Evaluate generated trees and print out statistics."""

    log_info('Evaluating...')
    evaler = Evaluator()
    for eval_bundle, eval_ttree, gen_ttree, da in zip(eval_doc.bundles, eval_ttrees, gen_ttrees, das):
        # add some stats about the tree directly into the output file
        add_bundle_text(eval_bundle, language, selector + 'Xscore',
                        "P: %.4f R: %.4f F1: %.4f" % p_r_f1_from_counts(*corr_pred_gold(eval_ttree, gen_ttree)))

        # collect overall stats
        # TODO maybe add cost ??
        evaler.append(eval_ttree, gen_ttree)
    # print overall stats
    log_info("NODE precision: %.4f, Recall: %.4f, F1: %.4f" % evaler.p_r_f1())
    log_info("DEP  precision: %.4f, Recall: %.4f, F1: %.4f" % evaler.p_r_f1(EvalTypes.DEP))
    log_info("Tree size stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" % evaler.size_stats())
    log_info("Score stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" % evaler.score_stats())
    log_info("Common subtree stats:\n -- SIZE: %s\n -- ΔGLD: %s\n -- ΔPRD: %s" %
             evaler.common_substruct_stats())
コード例 #3
0
    def process_eval_doc(self, eval_doc, gen_trees, language, ref_selector,
                         target_selector):
        """Evaluate generated trees against a reference document; save per-tree statistics
        in the reference document and print out global statistics.

        Does not reset statistics at the beginning (must be reset manually if needed).

        @param eval_doc: reference t-tree document
        @param gen_trees: a list of generated TreeData objects
        @param language: language for the reference document
        @param ref_selector: selector for reference trees in the reference document
        @param target_selector: selector for generated trees (used to save statistics)
        """
        log_info('Evaluating...')
        for eval_bundle, gen_tree, in zip(eval_doc.bundles, gen_trees):
            # add some stats about the tree directly into the output file
            eval_ttree = eval_bundle.get_zone(language, ref_selector).ttree
            gen_ttree = TreeNode(gen_tree)
            add_bundle_text(
                eval_bundle, language, target_selector + 'Xscore',
                "P: %.4f R: %.4f F1: %.4f" %
                p_r_f1_from_counts(*corr_pred_gold(eval_ttree, gen_ttree)))
            # collect overall stats
            # TODO maybe add cost somehow?
            self.append(eval_ttree, gen_ttree)

        # print out the overall stats
        log_info("NODE precision: %.4f, Recall: %.4f, F1: %.4f" %
                 self.p_r_f1())
        log_info("DEP  precision: %.4f, Recall: %.4f, F1: %.4f" %
                 self.p_r_f1(EvalTypes.DEP))
        log_info("Tree size stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" %
                 self.size_stats())
        log_info("Score stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" %
                 self.score_stats())
        log_info(
            "Common subtree stats:\n -- SIZE: %s\n -- ΔGLD: %s\n -- ΔPRD: %s" %
            self.common_substruct_stats())
コード例 #4
0
def asearch_gen(args):
    """A*search generation"""
    from pytreex.core.document import Document

    opts, files = getopt(args, 'e:d:w:c:s:')
    eval_file = None
    fname_ttrees_out = None
    cfg_file = None
    eval_selector = ''

    for opt, arg in opts:
        if opt == '-e':
            eval_file = arg
        elif opt == '-s':
            eval_selector = arg
        elif opt == '-d':
            set_debug_stream(file_stream(arg, mode='w'))
        elif opt == '-w':
            fname_ttrees_out = arg
        elif opt == '-c':
            cfg_file = arg

    if len(files) != 3:
        sys.exit('Invalid arguments.\n' + __doc__)
    fname_cand_model, fname_rank_model, fname_da_test = files

    log_info('Initializing...')
    candgen = RandomCandidateGenerator.load_from_file(fname_cand_model)
    ranker = PerceptronRanker.load_from_file(fname_rank_model)
    cfg = Config(cfg_file) if cfg_file else {}
    cfg.update({'candgen': candgen, 'ranker': ranker})
    tgen = ASearchPlanner(cfg)

    log_info('Generating...')
    das = read_das(fname_da_test)

    if eval_file is None:
        gen_doc = Document()
    else:
        eval_doc = read_ttrees(eval_file)
        if eval_selector == tgen.selector:
            gen_doc = Document()
        else:
            gen_doc = eval_doc

    # generate and evaluate
    if eval_file is not None:
        # generate + analyze open&close lists
        lists_analyzer = ASearchListsAnalyzer()
        for num, (da, gold_tree) in enumerate(zip(
                das, trees_from_doc(eval_doc, tgen.language, eval_selector)),
                                              start=1):
            log_debug("\n\nTREE No. %03d" % num)
            gen_tree = tgen.generate_tree(da, gen_doc)
            lists_analyzer.append(gold_tree, tgen.open_list, tgen.close_list)
            if gen_tree != gold_tree:
                log_debug("\nDIFFING TREES:\n" +
                          tgen.ranker.diffing_trees_with_scores(
                              da, gold_tree, gen_tree) + "\n")

        log_info('Gold tree BEST: %.4f, on CLOSE: %.4f, on ANY list: %4f' %
                 lists_analyzer.stats())

        # evaluate the generated trees against golden trees
        eval_ttrees = ttrees_from_doc(eval_doc, tgen.language, eval_selector)
        gen_ttrees = ttrees_from_doc(gen_doc, tgen.language, tgen.selector)

        log_info('Evaluating...')
        evaler = Evaluator()
        for eval_bundle, eval_ttree, gen_ttree, da in zip(
                eval_doc.bundles, eval_ttrees, gen_ttrees, das):
            # add some stats about the tree directly into the output file
            add_bundle_text(
                eval_bundle, tgen.language, tgen.selector + 'Xscore',
                "P: %.4f R: %.4f F1: %.4f" %
                p_r_f1_from_counts(*corr_pred_gold(eval_ttree, gen_ttree)))

            # collect overall stats
            evaler.append(eval_ttree, gen_ttree,
                          ranker.score(TreeData.from_ttree(eval_ttree), da),
                          ranker.score(TreeData.from_ttree(gen_ttree), da))
        # print overall stats
        log_info("NODE precision: %.4f, Recall: %.4f, F1: %.4f" %
                 evaler.p_r_f1())
        log_info("DEP  precision: %.4f, Recall: %.4f, F1: %.4f" %
                 evaler.p_r_f1(EvalTypes.DEP))
        log_info("Tree size stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" %
                 evaler.size_stats())
        log_info("Score stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" %
                 evaler.score_stats())
        log_info(
            "Common subtree stats:\n -- SIZE: %s\n -- ΔGLD: %s\n -- ΔPRD: %s" %
            evaler.common_substruct_stats())
    # just generate
    else:
        for da in das:
            tgen.generate_tree(da, gen_doc)

    # write output
    if fname_ttrees_out is not None:
        log_info('Writing output...')
        write_ttrees(gen_doc, fname_ttrees_out)
コード例 #5
0
ファイル: run_tgen.py プロジェクト: UFAL-DSG/tgen
def asearch_gen(args):
    """A*search generation"""
    from pytreex.core.document import Document

    opts, files = getopt(args, 'e:d:w:c:s:')
    eval_file = None
    fname_ttrees_out = None
    cfg_file = None
    eval_selector = ''

    for opt, arg in opts:
        if opt == '-e':
            eval_file = arg
        elif opt == '-s':
            eval_selector = arg
        elif opt == '-d':
            set_debug_stream(file_stream(arg, mode='w'))
        elif opt == '-w':
            fname_ttrees_out = arg
        elif opt == '-c':
            cfg_file = arg

    if len(files) != 3:
        sys.exit('Invalid arguments.\n' + __doc__)
    fname_cand_model, fname_rank_model, fname_da_test = files

    log_info('Initializing...')
    candgen = RandomCandidateGenerator.load_from_file(fname_cand_model)
    ranker = PerceptronRanker.load_from_file(fname_rank_model)
    cfg = Config(cfg_file) if cfg_file else {}
    cfg.update({'candgen': candgen, 'ranker': ranker})
    tgen = ASearchPlanner(cfg)

    log_info('Generating...')
    das = read_das(fname_da_test)

    if eval_file is None:
        gen_doc = Document()
    else:
        eval_doc = read_ttrees(eval_file)
        if eval_selector == tgen.selector:
            gen_doc = Document()
        else:
            gen_doc = eval_doc

    # generate and evaluate
    if eval_file is not None:
        # generate + analyze open&close lists
        lists_analyzer = ASearchListsAnalyzer()
        for num, (da, gold_tree) in enumerate(zip(das,
                                                  trees_from_doc(eval_doc, tgen.language, eval_selector)),
                                              start=1):
            log_debug("\n\nTREE No. %03d" % num)
            gen_tree = tgen.generate_tree(da, gen_doc)
            lists_analyzer.append(gold_tree, tgen.open_list, tgen.close_list)
            if gen_tree != gold_tree:
                log_debug("\nDIFFING TREES:\n" + tgen.ranker.diffing_trees_with_scores(da, gold_tree, gen_tree) + "\n")

        log_info('Gold tree BEST: %.4f, on CLOSE: %.4f, on ANY list: %4f' % lists_analyzer.stats())

        # evaluate the generated trees against golden trees
        eval_ttrees = ttrees_from_doc(eval_doc, tgen.language, eval_selector)
        gen_ttrees = ttrees_from_doc(gen_doc, tgen.language, tgen.selector)

        log_info('Evaluating...')
        evaler = Evaluator()
        for eval_bundle, eval_ttree, gen_ttree, da in zip(eval_doc.bundles, eval_ttrees, gen_ttrees, das):
            # add some stats about the tree directly into the output file
            add_bundle_text(eval_bundle, tgen.language, tgen.selector + 'Xscore',
                            "P: %.4f R: %.4f F1: %.4f" % p_r_f1_from_counts(*corr_pred_gold(eval_ttree, gen_ttree)))

            # collect overall stats
            evaler.append(eval_ttree,
                          gen_ttree,
                          ranker.score(TreeData.from_ttree(eval_ttree), da),
                          ranker.score(TreeData.from_ttree(gen_ttree), da))
        # print overall stats
        log_info("NODE precision: %.4f, Recall: %.4f, F1: %.4f" % evaler.p_r_f1())
        log_info("DEP  precision: %.4f, Recall: %.4f, F1: %.4f" % evaler.p_r_f1(EvalTypes.DEP))
        log_info("Tree size stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" % evaler.size_stats())
        log_info("Score stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" % evaler.score_stats())
        log_info("Common subtree stats:\n -- SIZE: %s\n -- ΔGLD: %s\n -- ΔPRD: %s" %
                 evaler.common_substruct_stats())
    # just generate
    else:
        for da in das:
            tgen.generate_tree(da, gen_doc)

    # write output
    if fname_ttrees_out is not None:
        log_info('Writing output...')
        write_ttrees(gen_doc, fname_ttrees_out)