Exemple #1
0
def train_test(train_passages, dev_passages, test_passages, args, model_suffix=""):
    """
    Train and test parser on given passage
    :param train_passages: passage to train on
    :param dev_passages: passages to evaluate on every iteration
    :param test_passages: passages to test on after training
    :param args: extra argument
    :param model_suffix: string to append to model filename before file extension
    :return: generator of Scores objects: dev scores for each training iteration (if given dev), and finally test scores
    """
    model_files = [base + model_suffix + ext for base, ext in map(os.path.splitext, args.models or (args.classifier,))]
    p = Parser(model_files=model_files, config=Config(), beam=args.beam)
    yield from filter(None, p.train(train_passages, dev=dev_passages, test=test_passages, iterations=args.iterations))
    if test_passages:
        if args.train or args.folds:
            print("Evaluating on test passages")
        passage_scores = []
        evaluate = args.evaluate or train_passages
        for result in p.parse(test_passages, evaluate=evaluate, write=args.write):
            _, *score = result
            passage_scores += score
        if passage_scores:
            scores = Scores(passage_scores)
            if args.verbose <= 1 or len(passage_scores) > 1:
                print("\nAverage %s F1 score on test: %.3f" % (get_eval_type(scores), average_f1(scores)))
                print("Aggregated scores:")
                scores.print()
            print_scores(scores, args.testscores)
            yield scores
Exemple #2
0
def main_generator():
    args = Config().args
    assert args.passages or args.train, "Either passages or --train is required (use -h for help)"
    assert args.models or args.train or args.folds, "Either --model or --train or --folds is required"
    assert not (args.train or args.dev) or not args.folds, "--train and --dev are incompatible with --folds"
    assert args.train or not args.dev, "--dev is only possible together with --train"
    if args.folds:
        fold_scores = []
        all_passages = list(read_passages(args, args.passages))
        assert len(all_passages) >= args.folds, \
            "%d folds are not possible with only %d passages" % (args.folds, len(all_passages))
        Config().random.shuffle(all_passages)
        folds = [all_passages[i::args.folds] for i in range(args.folds)]
        for i in range(args.folds):
            print("Fold %d of %d:" % (i + 1, args.folds))
            dev_passages = folds[i]
            test_passages = folds[(i + 1) % args.folds]
            train_passages = [passage for fold in folds if fold is not dev_passages and fold is not test_passages
                              for passage in fold]
            s = list(train_test(train_passages, dev_passages, test_passages, args, "_%d" % i))
            if s and s[-1] is not None:
                fold_scores.append(s[-1])
        if fold_scores:
            scores = Scores(fold_scores)
            print("Average test F1 score for each fold: " + ", ".join("%.3f" % average_f1(s) for s in fold_scores))
            print("Aggregated scores across folds:\n")
            scores.print()
            yield scores
    else:  # Simple train/dev/test by given arguments
        train_passages, dev_passages, test_passages = [read_passages(args, arg) for arg in
                                                       (args.train, args.dev, args.passages)]
        yield from train_test(train_passages, dev_passages, test_passages, args)
Exemple #3
0
def main_generator():
    args = Config().args
    assert args.passages or args.train, "Either passages or --train is required (use -h for help)"
    assert args.models or args.train or args.folds, "Either --model or --train or --folds is required"
    assert not (args.train or args.dev) or not args.folds, "--train and --dev are incompatible with --folds"
    assert args.train or not args.dev, "--dev is only possible together with --train"
    if args.folds:
        fold_scores = []
        all_passages = list(read_passages(args, args.passages))
        assert len(all_passages) >= args.folds, \
            "%d folds are not possible with only %d passages" % (args.folds, len(all_passages))
        Config().random.shuffle(all_passages)
        folds = [all_passages[i::args.folds] for i in range(args.folds)]
        for i in range(args.folds):
            print("Fold %d of %d:" % (i + 1, args.folds))
            dev_passages = folds[i]
            test_passages = folds[(i + 1) % args.folds]
            train_passages = [passage for fold in folds if fold is not dev_passages and fold is not test_passages
                              for passage in fold]
            s = list(train_test(train_passages, dev_passages, test_passages, args, "_%d" % i))
            if s and s[-1] is not None:
                fold_scores.append(s[-1])
        if fold_scores:
            scores = Scores(fold_scores)
            print("Average test F1 score for each fold: " + ", ".join("%.3f" % average_f1(s) for s in fold_scores))
            print("Aggregated scores across folds:\n")
            scores.print()
            yield scores
    else:  # Simple train/dev/test by given arguments
        train_passages, dev_passages, test_passages = [read_passages(args, arg) for arg in
                                                       (args.train, args.dev, args.passages)]
        yield from train_test(train_passages, dev_passages, test_passages, args)
Exemple #4
0
def train_test(train_passages, dev_passages, test_passages, args, model_suffix=""):
    """
    Train and test parser on given passage
    :param train_passages: passage to train on
    :param dev_passages: passages to evaluate on every iteration
    :param test_passages: passages to test on after training
    :param args: extra argument
    :param model_suffix: string to append to model filename before file extension
    :return: generator of Scores objects: dev scores for each training iteration (if given dev), and finally test scores
    """
    model_files = [base + model_suffix + ext for base, ext in map(os.path.splitext, args.models or (args.classifier,))]
    p = Parser(model_files=model_files, config=Config(), beam=args.beam)
    yield from filter(None, p.train(train_passages, dev=dev_passages, test=test_passages, iterations=args.iterations))
    if test_passages:
        if args.train or args.folds:
            print("Evaluating on test passages")
        passage_scores = []
        evaluate = args.evaluate or train_passages
        for result in p.parse(test_passages, evaluate=evaluate, write=args.write):
            _, *score = result
            passage_scores += score
        if passage_scores:
            scores = Scores(passage_scores)
            if args.verbose <= 1 or len(passage_scores) > 1:
                print("\nAverage %s F1 score on test: %.3f" % (get_eval_type(scores), average_f1(scores)))
                print("Aggregated scores:")
                scores.print()
            print_scores(scores, args.testscores)
            yield scores
Exemple #5
0
 def eval(self, passages, mode, scores_filename, display=True):
     print("Evaluating on %s passages" % mode.name)
     passage_scores = [s for _, s in self.parse(passages, mode=mode, evaluate=True, display=display)]
     scores = Scores(passage_scores)
     average_score = average_f1(scores)
     prefix = ".".join(map(str, [self.iteration, self.epoch] + (
         [self.batch] if self.config.args.save_every else [])))
     if display:
         print("Evaluation %s, average %s F1 score on %s: %.3f%s" % (prefix, get_eval_type(scores), mode.name,
                                                                     average_score, scores.details(average_f1)))
     print_scores(scores, scores_filename, prefix=prefix, prefix_title="iteration")
     return average_score, scores
Exemple #6
0
def main(args):
    for spec in read_specs(args, converters=FROM_FORMAT):
        scores = []
        if not args.verbose:
            spec.passages = tqdm(
                spec.passages,
                unit=" passages",
                desc="Parsing " +
                (spec.out_dir if spec.out_dir != "." else spec.lang))
        for passage, parsed in parse(spec.passages, spec.lang, spec.udpipe,
                                     args.verbose):
            map_labels(parsed, args.label_map)
            normalize(parsed, extra=True)
            if args.write:
                write_passage(parsed, args)
            if args.evaluate:
                evaluator = EVALUATORS.get(args.output_format)
                converter = TO_FORMAT.get(args.output_format)
                if converter is not None:
                    passage, parsed = map(converter, (passage, parsed))
                if evaluator is not None:
                    scores.append(
                        evaluator.evaluate(parsed,
                                           passage,
                                           verbose=args.verbose > 1))
        if scores:
            Scores(scores).print()
Exemple #7
0
def main(args):
    for spec in read_specs(args, converters=CONVERTERS):
        scores = []
        sentences, to_parse = tee(
            (to_conllu_native(p),
             to_conllu_native(p, test=True, enhanced=False)
             ) if isinstance(p, core.Passage) else (p, strip_enhanced(p))
            for p in spec.passages)
        t = tqdm(zip((x for x, _ in sentences),
                     split_by_empty_lines(
                         udpipe((x for _, x in to_parse), spec.udpipe,
                                args.verbose))),
                 unit=" sentences")
        for sentence, parsed in t:
            sentence = list(sentence)
            if args.write:
                i = next(find_ids(sentence))
                t.set_postfix(id=i)
                with open_out_file(spec, i) as f:
                    for line in parsed:
                        print(line, file=f)
            if args.evaluate:
                scores.append(
                    evaluate(parsed, sentence, verbose=args.verbose > 1))
        if scores:
            Scores(scores).print()
Exemple #8
0
def diff(results, indices=None, verbose=False):
    scores = [Scores(r if indices is None else [r[i] for i in indices]) for r in results]
    fields = np.array([s.fields() for s in scores], dtype=float)
    if verbose:
        print(" ".join(evaluation.Scores.field_titles()))
        print("\n".join(map(str, fields)))
    return fields[1] - fields[0]
Exemple #9
0
def test_parser(config, model_type, formats, default_setting, text=True):
    filename = "test_files/models/%s_%s%s" % ("_".join(formats), model_type,
                                              default_setting.suffix())
    remove_existing(filename)
    config.update(default_setting.dict())
    scores = []
    params = []
    passages = list(map(load_passage, passage_files(*formats)))
    evaluate = ("amr" not in formats)
    for mode in "train", "load":
        print("-- %sing %s" % (mode, model_type))
        config.update(dict(classifier=model_type, copy_shared=None))
        p = Parser(model_files=filename, config=config)
        p.save_init = True
        list(
            p.train(passages if mode == "train" else None,
                    dev=passages,
                    test=True,
                    iterations=2))
        assert p.model.is_finalized, "Model should be finalized after %sing" % mode
        assert not getattr(p.model.feature_extractor, "node_dropout",
                           0), p.model.feature_extractor.node_dropout
        all_params = p.model.all_params()
        params.append(all_params)
        param1, param2 = [
            d.get("W") for d in (all_params, p.model.feature_extractor.params)
        ]
        if param1 is not None and param2 and param2.init is not None and not config.args.update_word_vectors:
            assert_allclose(param1,
                            weight_decay(p.model) * param2.init,
                            rtol=1e-6)
        text_results = results = list(p.parse(passages, evaluate=evaluate))
        if text:
            print("Converting to text and parsing...")
            text_results = list(
                p.parse([
                    p3 for p1 in passages
                    for p2 in convert.to_text(p1, sentences=False)
                    for p3 in convert.from_text(
                        p2, p1.ID, extra_format=p1.extra.get("format"))
                ]))
            assert len(results) == len(text_results)
        if evaluate:
            scores.append(Scores(tuple(zip(*results))[1]).average_f1())
            if text:
                for t, (r, s) in zip(text_results, results):
                    print("  %s F1=%.3f" % (r.ID, s.average_f1()))
        assert not list(p.parse(()))  # parsing nothing returns nothing
        print()
    assert_all_params_equal(*params)
    if evaluate:
        print("-- average f1: %.3f, %.3f\n" % tuple(scores))
        assert scores[0] == pytest.approx(scores[1], 0.1)
Exemple #10
0
 def eval(self, passages, mode, scores_filename, display=True):
     print("Evaluating on %s passages" % mode.name)
     passage_scores = [s for _, s in self.parse(passages, mode=mode, evaluate=True, display=display)]
     scores = Scores(passage_scores)
     average_score = average_f1(scores)
     prefix = ".".join(map(str, [self.iteration, self.epoch] + (
         [self.batch] if self.config.args.save_every else [])))
     if display:
         print("Evaluation %s, average %s F1 score on %s: %.3f%s" % (prefix, get_eval_type(scores), mode.name,
                                                                     average_score, scores.details(average_f1)))
     print_scores(scores, scores_filename, prefix=prefix, prefix_title="iteration")
     return average_score, scores
Exemple #11
0
def main():
    argparser = configargparse.ArgParser(description=desc)
    argparser.add_argument("filenames", nargs="+", help="file names to convert and evaluate")
    add_verbose_arg(argparser, help="detailed evaluation output")
    add_boolean_option(argparser, "wikification", "Spotlight to wikify any named node (for AMR)")
    argparser.add_argument("-o", "--out-dir", help="output directory (if unspecified, files are not written)")
    args = argparser.parse_args()

    scores = []
    for pattern in args.filenames:
        filenames = glob(pattern)
        if not filenames:
            raise IOError("Not found: " + pattern)
        for filename in filenames:
            print("\rConverting '%s'" % filename, end="")
            if args.out_dir or args.verbose:
                print(flush=True)
            basename, ext = os.path.splitext(os.path.basename(filename))
            passage_format = ext.lstrip(".")
            converters = CONVERTERS.get(passage_format, CONVERTERS["amr"])
            evaluator = EVALUATORS.get(passage_format, EVALUATORS["amr"]).evaluate
            with open(filename, encoding="utf-8") as f:
                for passage, ref, passage_id in converters[0](f, passage_id=basename, return_original=True):
                    if args.out_dir:
                        os.makedirs(args.out_dir, exist_ok=True)
                        outfile = "%s/%s.xml" % (args.out_dir, passage.ID)
                        print("Writing '%s'..." % outfile, file=sys.stderr, flush=True)
                        ioutil.passage2file(passage, outfile)
                    try:
                        guessed = converters[1](passage, wikification=args.wikification, use_original=False)
                    except Exception as e:
                        raise ValueError("Error converting %s back from %s" % (filename, passage_format)) from e
                    if args.out_dir:
                        outfile = "%s/%s%s" % (args.out_dir, passage.ID, ext)
                        print("Writing '%s'..." % outfile, file=sys.stderr, flush=True)
                        with open(outfile, "w", encoding="utf-8") as f_out:
                            print("\n".join(guessed), file=f_out)
                    try:
                        s = evaluator(guessed, ref, verbose=args.verbose > 1)
                    except Exception as e:
                        raise ValueError("Error evaluating conversion of %s" % filename) from e
                    scores.append(s)
                    if args.verbose:
                        print(passage_id)
                        s.print()
    print()
    if args.verbose and len(scores) > 1:
        print("Aggregated scores:")
    Scores(scores).print()

    sys.exit(0)
Exemple #12
0
def main(args):
    for passages, out_dir, lang in read_specs(args):
        scores = []
        if not args.verbose:
            passages = tqdm(passages, unit=" passages", desc="Parsing " + out_dir)
        for passage, parsed in ANNOTATORS[args.parser](passages, lang, args.verbose):
            if args.write:
                write_passage(parsed, args)
            else:
                map_labels(parsed, args.label_map)
            if args.evaluate:
                evaluator = EVALUATORS[args.output_format]
                _, converter = CONVERTERS[args.output_format]
                if converter is not None:
                    passage, parsed = map(converter, (passage, parsed))
                scores.append(evaluator.evaluate(parsed, passage, verbose=args.verbose > 1))
        if scores:
            Scores(scores).print()
Exemple #13
0
def main(args):
    scores = []
    for pattern in args.filenames:
        filenames = glob(pattern)
        if not filenames:
            raise IOError("Not found: " + pattern)
        for filename in filenames:
            print("\rConverting '%s'" % filename, end="")
            if args.out_dir or args.verbose:
                print(flush=True)
            basename, ext = os.path.splitext(os.path.basename(filename))
            passage_format = ext.lstrip(".")
            converters = CONVERTERS.get(passage_format, CONVERTERS["amr"])
            evaluator = EVALUATORS.get(passage_format, EVALUATORS["amr"]).evaluate
            with open(filename, encoding="utf-8") as f:
                for passage, ref, passage_id in converters[0](f, passage_id=basename, return_original=True):
                    if args.normalize:
                        normalize(passage, extra=args.extra_normalization)
                    if args.out_dir:
                        os.makedirs(args.out_dir, exist_ok=True)
                        outfile = "%s/%s.xml" % (args.out_dir, passage.ID)
                        print("Writing '%s'..." % outfile, file=sys.stderr, flush=True)
                        ioutil.passage2file(passage, outfile)
                    try:
                        guessed = converters[1](passage, wikification=args.wikification, use_original=False)
                    except Exception as e:
                        raise ValueError("Error converting %s back from %s" % (filename, passage_format)) from e
                    if args.out_dir:
                        outfile = "%s/%s%s" % (args.out_dir, passage.ID, ext)
                        print("Writing '%s'..." % outfile, file=sys.stderr, flush=True)
                        with open(outfile, "w", encoding="utf-8") as f_out:
                            print("\n".join(guessed), file=f_out)
                    try:
                        s = evaluator(guessed, ref, verbose=args.verbose > 1)
                    except Exception as e:
                        raise ValueError("Error evaluating conversion of %s" % filename) from e
                    scores.append(s)
                    if args.verbose:
                        print(passage_id)
                        s.print()
    print()
    if args.verbose and len(scores) > 1:
        print("Aggregated scores:")
    Scores(scores).print()
Exemple #14
0
def main(args):
    for spec in read_specs(args, converters=CONVERTERS):
        scores = []
        sentences1, sentences2 = tee(spec.passages)
        t = tqdm(zip(
            sentences1,
            split_by_empty_lines(udpipe(sentences2, spec.udpipe,
                                        args.verbose))),
                 unit=" sentences")
        for sentence, parsed in t:
            sentence = list(sentence)
            if args.write:
                i = next(find_ids(sentence))
                t.set_postfix(id=i)
                with open(os.path.join(spec.out_dir, i + ".conllu"),
                          "w",
                          encoding="utf-8") as f:
                    for line in parsed:
                        print(line, file=f)
            if args.evaluate:
                scores.append(
                    evaluate(parsed, sentence, verbose=args.verbose > 1))
        if scores:
            Scores(scores).print()
Exemple #15
0
def main(args):
    if args.out_dir:
        os.makedirs(args.out_dir, exist_ok=True)
    scores = []
    for pattern in args.filenames:
        for filename in glob(pattern) or [pattern]:
            file_scores = []
            basename, ext = os.path.splitext(os.path.basename(filename))
            passage_format = ext.lstrip(".")
            if passage_format == "txt":
                passage_format = args.format
            in_converter, out_converter = CONVERTERS.get(
                passage_format, CONVERTERS[args.format])
            evaluate = EVALUATORS.get(passage_format, EVALUATORS[args.format])
            with open(filename, encoding="utf-8") as f:
                t = tqdm(in_converter(f,
                                      passage_id=basename,
                                      return_original=True),
                         unit=" passages",
                         desc=("Converting '%s'" % filename) +
                         ((", writing to '%s'" %
                           args.out_dir) if args.out_dir else ""))
                for passage, ref, passage_id in t:
                    if args.normalize:
                        normalize(passage, extra=args.extra_normalization)
                    if args.out_dir:
                        os.makedirs(args.out_dir, exist_ok=True)
                        outfile = os.path.join(args.out_dir,
                                               passage.ID + ".xml")
                        if args.verbose:
                            with ioutil.external_write_mode():
                                print("Writing '%s'..." % outfile,
                                      file=sys.stderr,
                                      flush=True)
                        ioutil.passage2file(passage, outfile)
                    try:
                        guessed = out_converter(passage,
                                                wikification=args.wikification,
                                                use_original=False)
                    except Exception as e:
                        raise ValueError("Error converting %s back from %s" %
                                         (filename, passage_format)) from e
                    if args.out_dir:
                        outfile = os.path.join(args.out_dir, passage.ID + ext)
                        if args.verbose:
                            with ioutil.external_write_mode():
                                print("Writing '%s'..." % outfile,
                                      file=sys.stderr,
                                      flush=True)
                        with open(outfile, "w", encoding="utf-8") as f_out:
                            print("\n".join(guessed), file=f_out)
                    try:
                        s = evaluate(guessed,
                                     ref,
                                     verbose=args.verbose > 1,
                                     units=args.units)
                    except Exception as e:
                        raise ValueError("Error evaluating conversion of %s" %
                                         filename) from e
                    file_scores.append(s)
                    if args.verbose:
                        with ioutil.external_write_mode():
                            print(passage_id)
                            s.print()
                    t.set_postfix(F1="%.2f" %
                                  (100.0 * Scores(file_scores).average_f1()))
            scores += file_scores
    print()
    if args.verbose and len(scores) > 1:
        print("Aggregated scores:")
    Scores(scores).print()
Exemple #16
0
                            if not os.path.isdir(os.path.join(d, f))
                        ] if os.path.isdir(d) else [d]
                        for d in (os.path.join(submission_dir, track, lang),
                                  os.path.join(truth_dir, lang), None)
                    ]

                    evaluate = EVALUATORS.get(
                        passage_format(files[1][0])[1], EVALUATORS["amr"])
                    results = list(
                        evaluate_all(evaluate,
                                     files,
                                     format="amr",
                                     name="Evaluating",
                                     unlabeled=False,
                                     matching_ids=True))
                    summary = Scores(results)

                    # write results to html file and append to values
                    output_html_file.write("<tr>\n"
                                           "<td>%s</td>" % competition)

                    # labeled
                    output_html_file.write("<td>%.3f</td>\n" %
                                           (summary.average_f1(LABELED)))
                    values.append(round(summary.average_f1(LABELED), 3))
                    for (title, field) in zip(summary.titles(LABELED),
                                              summary.fields(LABELED)):
                        output_html_file.write("<td>%.3f</td>\n" %
                                               (float(field)))
                        values.append(float(field))