def main_generator(): args = Config().args assert args.passages or args.train, "Either passages or --train is required (use -h for help)" assert args.models or args.train or args.folds, "Either --model or --train or --folds is required" assert not (args.train or args.dev) or not args.folds, "--train and --dev are incompatible with --folds" assert args.train or not args.dev, "--dev is only possible together with --train" if args.folds: fold_scores = [] all_passages = list(read_passages(args, args.passages)) assert len(all_passages) >= args.folds, \ "%d folds are not possible with only %d passages" % (args.folds, len(all_passages)) Config().random.shuffle(all_passages) folds = [all_passages[i::args.folds] for i in range(args.folds)] for i in range(args.folds): print("Fold %d of %d:" % (i + 1, args.folds)) dev_passages = folds[i] test_passages = folds[(i + 1) % args.folds] train_passages = [passage for fold in folds if fold is not dev_passages and fold is not test_passages for passage in fold] s = list(train_test(train_passages, dev_passages, test_passages, args, "_%d" % i)) if s and s[-1] is not None: fold_scores.append(s[-1]) if fold_scores: scores = Scores(fold_scores) print("Average test F1 score for each fold: " + ", ".join("%.3f" % average_f1(s) for s in fold_scores)) print("Aggregated scores across folds:\n") scores.print() yield scores else: # Simple train/dev/test by given arguments train_passages, dev_passages, test_passages = [read_passages(args, arg) for arg in (args.train, args.dev, args.passages)] yield from train_test(train_passages, dev_passages, test_passages, args)
def train_test(train_passages, dev_passages, test_passages, args, model_suffix=""): """ Train and test parser on given passage :param train_passages: passage to train on :param dev_passages: passages to evaluate on every iteration :param test_passages: passages to test on after training :param args: extra argument :param model_suffix: string to append to model filename before file extension :return: generator of Scores objects: dev scores for each training iteration (if given dev), and finally test scores """ model_files = [base + model_suffix + ext for base, ext in map(os.path.splitext, args.models or (args.classifier,))] p = Parser(model_files=model_files, config=Config(), beam=args.beam) yield from filter(None, p.train(train_passages, dev=dev_passages, test=test_passages, iterations=args.iterations)) if test_passages: if args.train or args.folds: print("Evaluating on test passages") passage_scores = [] evaluate = args.evaluate or train_passages for result in p.parse(test_passages, evaluate=evaluate, write=args.write): _, *score = result passage_scores += score if passage_scores: scores = Scores(passage_scores) if args.verbose <= 1 or len(passage_scores) > 1: print("\nAverage %s F1 score on test: %.3f" % (get_eval_type(scores), average_f1(scores))) print("Aggregated scores:") scores.print() print_scores(scores, args.testscores) yield scores