def train_test(train_passages, dev_passages, test_passages, args, model_suffix=""): """ Train and test parser on given passage :param train_passages: passage to train on :param dev_passages: passages to evaluate on every iteration :param test_passages: passages to test on after training :param args: extra argument :param model_suffix: string to append to model filename before file extension :return: generator of Scores objects: dev scores for each training iteration (if given dev), and finally test scores """ model_files = [base + model_suffix + ext for base, ext in map(os.path.splitext, args.models or (args.classifier,))] p = Parser(model_files=model_files, config=Config(), beam=args.beam) yield from filter(None, p.train(train_passages, dev=dev_passages, test=test_passages, iterations=args.iterations)) if test_passages: if args.train or args.folds: print("Evaluating on test passages") passage_scores = [] evaluate = args.evaluate or train_passages for result in p.parse(test_passages, evaluate=evaluate, write=args.write): _, *score = result passage_scores += score if passage_scores: scores = Scores(passage_scores) if args.verbose <= 1 or len(passage_scores) > 1: print("\nAverage %s F1 score on test: %.3f" % (get_eval_type(scores), average_f1(scores))) print("Aggregated scores:") scores.print() print_scores(scores, args.testscores) yield scores
def main_generator(): args = Config().args assert args.passages or args.train, "Either passages or --train is required (use -h for help)" assert args.models or args.train or args.folds, "Either --model or --train or --folds is required" assert not (args.train or args.dev) or not args.folds, "--train and --dev are incompatible with --folds" assert args.train or not args.dev, "--dev is only possible together with --train" if args.folds: fold_scores = [] all_passages = list(read_passages(args, args.passages)) assert len(all_passages) >= args.folds, \ "%d folds are not possible with only %d passages" % (args.folds, len(all_passages)) Config().random.shuffle(all_passages) folds = [all_passages[i::args.folds] for i in range(args.folds)] for i in range(args.folds): print("Fold %d of %d:" % (i + 1, args.folds)) dev_passages = folds[i] test_passages = folds[(i + 1) % args.folds] train_passages = [passage for fold in folds if fold is not dev_passages and fold is not test_passages for passage in fold] s = list(train_test(train_passages, dev_passages, test_passages, args, "_%d" % i)) if s and s[-1] is not None: fold_scores.append(s[-1]) if fold_scores: scores = Scores(fold_scores) print("Average test F1 score for each fold: " + ", ".join("%.3f" % average_f1(s) for s in fold_scores)) print("Aggregated scores across folds:\n") scores.print() yield scores else: # Simple train/dev/test by given arguments train_passages, dev_passages, test_passages = [read_passages(args, arg) for arg in (args.train, args.dev, args.passages)] yield from train_test(train_passages, dev_passages, test_passages, args)
def eval(self, passages, mode, scores_filename, display=True): print("Evaluating on %s passages" % mode.name) passage_scores = [s for _, s in self.parse(passages, mode=mode, evaluate=True, display=display)] scores = Scores(passage_scores) average_score = average_f1(scores) prefix = ".".join(map(str, [self.iteration, self.epoch] + ( [self.batch] if self.config.args.save_every else []))) if display: print("Evaluation %s, average %s F1 score on %s: %.3f%s" % (prefix, get_eval_type(scores), mode.name, average_score, scores.details(average_f1))) print_scores(scores, scores_filename, prefix=prefix, prefix_title="iteration") return average_score, scores
def main(args): for spec in read_specs(args, converters=FROM_FORMAT): scores = [] if not args.verbose: spec.passages = tqdm( spec.passages, unit=" passages", desc="Parsing " + (spec.out_dir if spec.out_dir != "." else spec.lang)) for passage, parsed in parse(spec.passages, spec.lang, spec.udpipe, args.verbose): map_labels(parsed, args.label_map) normalize(parsed, extra=True) if args.write: write_passage(parsed, args) if args.evaluate: evaluator = EVALUATORS.get(args.output_format) converter = TO_FORMAT.get(args.output_format) if converter is not None: passage, parsed = map(converter, (passage, parsed)) if evaluator is not None: scores.append( evaluator.evaluate(parsed, passage, verbose=args.verbose > 1)) if scores: Scores(scores).print()
def main(args): for spec in read_specs(args, converters=CONVERTERS): scores = [] sentences, to_parse = tee( (to_conllu_native(p), to_conllu_native(p, test=True, enhanced=False) ) if isinstance(p, core.Passage) else (p, strip_enhanced(p)) for p in spec.passages) t = tqdm(zip((x for x, _ in sentences), split_by_empty_lines( udpipe((x for _, x in to_parse), spec.udpipe, args.verbose))), unit=" sentences") for sentence, parsed in t: sentence = list(sentence) if args.write: i = next(find_ids(sentence)) t.set_postfix(id=i) with open_out_file(spec, i) as f: for line in parsed: print(line, file=f) if args.evaluate: scores.append( evaluate(parsed, sentence, verbose=args.verbose > 1)) if scores: Scores(scores).print()
def diff(results, indices=None, verbose=False): scores = [Scores(r if indices is None else [r[i] for i in indices]) for r in results] fields = np.array([s.fields() for s in scores], dtype=float) if verbose: print(" ".join(evaluation.Scores.field_titles())) print("\n".join(map(str, fields))) return fields[1] - fields[0]
def test_parser(config, model_type, formats, default_setting, text=True): filename = "test_files/models/%s_%s%s" % ("_".join(formats), model_type, default_setting.suffix()) remove_existing(filename) config.update(default_setting.dict()) scores = [] params = [] passages = list(map(load_passage, passage_files(*formats))) evaluate = ("amr" not in formats) for mode in "train", "load": print("-- %sing %s" % (mode, model_type)) config.update(dict(classifier=model_type, copy_shared=None)) p = Parser(model_files=filename, config=config) p.save_init = True list( p.train(passages if mode == "train" else None, dev=passages, test=True, iterations=2)) assert p.model.is_finalized, "Model should be finalized after %sing" % mode assert not getattr(p.model.feature_extractor, "node_dropout", 0), p.model.feature_extractor.node_dropout all_params = p.model.all_params() params.append(all_params) param1, param2 = [ d.get("W") for d in (all_params, p.model.feature_extractor.params) ] if param1 is not None and param2 and param2.init is not None and not config.args.update_word_vectors: assert_allclose(param1, weight_decay(p.model) * param2.init, rtol=1e-6) text_results = results = list(p.parse(passages, evaluate=evaluate)) if text: print("Converting to text and parsing...") text_results = list( p.parse([ p3 for p1 in passages for p2 in convert.to_text(p1, sentences=False) for p3 in convert.from_text( p2, p1.ID, extra_format=p1.extra.get("format")) ])) assert len(results) == len(text_results) if evaluate: scores.append(Scores(tuple(zip(*results))[1]).average_f1()) if text: for t, (r, s) in zip(text_results, results): print(" %s F1=%.3f" % (r.ID, s.average_f1())) assert not list(p.parse(())) # parsing nothing returns nothing print() assert_all_params_equal(*params) if evaluate: print("-- average f1: %.3f, %.3f\n" % tuple(scores)) assert scores[0] == pytest.approx(scores[1], 0.1)
def main(): argparser = configargparse.ArgParser(description=desc) argparser.add_argument("filenames", nargs="+", help="file names to convert and evaluate") add_verbose_arg(argparser, help="detailed evaluation output") add_boolean_option(argparser, "wikification", "Spotlight to wikify any named node (for AMR)") argparser.add_argument("-o", "--out-dir", help="output directory (if unspecified, files are not written)") args = argparser.parse_args() scores = [] for pattern in args.filenames: filenames = glob(pattern) if not filenames: raise IOError("Not found: " + pattern) for filename in filenames: print("\rConverting '%s'" % filename, end="") if args.out_dir or args.verbose: print(flush=True) basename, ext = os.path.splitext(os.path.basename(filename)) passage_format = ext.lstrip(".") converters = CONVERTERS.get(passage_format, CONVERTERS["amr"]) evaluator = EVALUATORS.get(passage_format, EVALUATORS["amr"]).evaluate with open(filename, encoding="utf-8") as f: for passage, ref, passage_id in converters[0](f, passage_id=basename, return_original=True): if args.out_dir: os.makedirs(args.out_dir, exist_ok=True) outfile = "%s/%s.xml" % (args.out_dir, passage.ID) print("Writing '%s'..." % outfile, file=sys.stderr, flush=True) ioutil.passage2file(passage, outfile) try: guessed = converters[1](passage, wikification=args.wikification, use_original=False) except Exception as e: raise ValueError("Error converting %s back from %s" % (filename, passage_format)) from e if args.out_dir: outfile = "%s/%s%s" % (args.out_dir, passage.ID, ext) print("Writing '%s'..." % outfile, file=sys.stderr, flush=True) with open(outfile, "w", encoding="utf-8") as f_out: print("\n".join(guessed), file=f_out) try: s = evaluator(guessed, ref, verbose=args.verbose > 1) except Exception as e: raise ValueError("Error evaluating conversion of %s" % filename) from e scores.append(s) if args.verbose: print(passage_id) s.print() print() if args.verbose and len(scores) > 1: print("Aggregated scores:") Scores(scores).print() sys.exit(0)
def main(args): for passages, out_dir, lang in read_specs(args): scores = [] if not args.verbose: passages = tqdm(passages, unit=" passages", desc="Parsing " + out_dir) for passage, parsed in ANNOTATORS[args.parser](passages, lang, args.verbose): if args.write: write_passage(parsed, args) else: map_labels(parsed, args.label_map) if args.evaluate: evaluator = EVALUATORS[args.output_format] _, converter = CONVERTERS[args.output_format] if converter is not None: passage, parsed = map(converter, (passage, parsed)) scores.append(evaluator.evaluate(parsed, passage, verbose=args.verbose > 1)) if scores: Scores(scores).print()
def main(args): scores = [] for pattern in args.filenames: filenames = glob(pattern) if not filenames: raise IOError("Not found: " + pattern) for filename in filenames: print("\rConverting '%s'" % filename, end="") if args.out_dir or args.verbose: print(flush=True) basename, ext = os.path.splitext(os.path.basename(filename)) passage_format = ext.lstrip(".") converters = CONVERTERS.get(passage_format, CONVERTERS["amr"]) evaluator = EVALUATORS.get(passage_format, EVALUATORS["amr"]).evaluate with open(filename, encoding="utf-8") as f: for passage, ref, passage_id in converters[0](f, passage_id=basename, return_original=True): if args.normalize: normalize(passage, extra=args.extra_normalization) if args.out_dir: os.makedirs(args.out_dir, exist_ok=True) outfile = "%s/%s.xml" % (args.out_dir, passage.ID) print("Writing '%s'..." % outfile, file=sys.stderr, flush=True) ioutil.passage2file(passage, outfile) try: guessed = converters[1](passage, wikification=args.wikification, use_original=False) except Exception as e: raise ValueError("Error converting %s back from %s" % (filename, passage_format)) from e if args.out_dir: outfile = "%s/%s%s" % (args.out_dir, passage.ID, ext) print("Writing '%s'..." % outfile, file=sys.stderr, flush=True) with open(outfile, "w", encoding="utf-8") as f_out: print("\n".join(guessed), file=f_out) try: s = evaluator(guessed, ref, verbose=args.verbose > 1) except Exception as e: raise ValueError("Error evaluating conversion of %s" % filename) from e scores.append(s) if args.verbose: print(passage_id) s.print() print() if args.verbose and len(scores) > 1: print("Aggregated scores:") Scores(scores).print()
def main(args): for spec in read_specs(args, converters=CONVERTERS): scores = [] sentences1, sentences2 = tee(spec.passages) t = tqdm(zip( sentences1, split_by_empty_lines(udpipe(sentences2, spec.udpipe, args.verbose))), unit=" sentences") for sentence, parsed in t: sentence = list(sentence) if args.write: i = next(find_ids(sentence)) t.set_postfix(id=i) with open(os.path.join(spec.out_dir, i + ".conllu"), "w", encoding="utf-8") as f: for line in parsed: print(line, file=f) if args.evaluate: scores.append( evaluate(parsed, sentence, verbose=args.verbose > 1)) if scores: Scores(scores).print()
def main(args): if args.out_dir: os.makedirs(args.out_dir, exist_ok=True) scores = [] for pattern in args.filenames: for filename in glob(pattern) or [pattern]: file_scores = [] basename, ext = os.path.splitext(os.path.basename(filename)) passage_format = ext.lstrip(".") if passage_format == "txt": passage_format = args.format in_converter, out_converter = CONVERTERS.get( passage_format, CONVERTERS[args.format]) evaluate = EVALUATORS.get(passage_format, EVALUATORS[args.format]) with open(filename, encoding="utf-8") as f: t = tqdm(in_converter(f, passage_id=basename, return_original=True), unit=" passages", desc=("Converting '%s'" % filename) + ((", writing to '%s'" % args.out_dir) if args.out_dir else "")) for passage, ref, passage_id in t: if args.normalize: normalize(passage, extra=args.extra_normalization) if args.out_dir: os.makedirs(args.out_dir, exist_ok=True) outfile = os.path.join(args.out_dir, passage.ID + ".xml") if args.verbose: with ioutil.external_write_mode(): print("Writing '%s'..." % outfile, file=sys.stderr, flush=True) ioutil.passage2file(passage, outfile) try: guessed = out_converter(passage, wikification=args.wikification, use_original=False) except Exception as e: raise ValueError("Error converting %s back from %s" % (filename, passage_format)) from e if args.out_dir: outfile = os.path.join(args.out_dir, passage.ID + ext) if args.verbose: with ioutil.external_write_mode(): print("Writing '%s'..." % outfile, file=sys.stderr, flush=True) with open(outfile, "w", encoding="utf-8") as f_out: print("\n".join(guessed), file=f_out) try: s = evaluate(guessed, ref, verbose=args.verbose > 1, units=args.units) except Exception as e: raise ValueError("Error evaluating conversion of %s" % filename) from e file_scores.append(s) if args.verbose: with ioutil.external_write_mode(): print(passage_id) s.print() t.set_postfix(F1="%.2f" % (100.0 * Scores(file_scores).average_f1())) scores += file_scores print() if args.verbose and len(scores) > 1: print("Aggregated scores:") Scores(scores).print()
if not os.path.isdir(os.path.join(d, f)) ] if os.path.isdir(d) else [d] for d in (os.path.join(submission_dir, track, lang), os.path.join(truth_dir, lang), None) ] evaluate = EVALUATORS.get( passage_format(files[1][0])[1], EVALUATORS["amr"]) results = list( evaluate_all(evaluate, files, format="amr", name="Evaluating", unlabeled=False, matching_ids=True)) summary = Scores(results) # write results to html file and append to values output_html_file.write("<tr>\n" "<td>%s</td>" % competition) # labeled output_html_file.write("<td>%.3f</td>\n" % (summary.average_f1(LABELED))) values.append(round(summary.average_f1(LABELED), 3)) for (title, field) in zip(summary.titles(LABELED), summary.fields(LABELED)): output_html_file.write("<td>%.3f</td>\n" % (float(field))) values.append(float(field))