def run_generate(): parser = argparse.ArgumentParser( epilog= "Unspecified args like --num_beams=2 --decoder_start_token_id=4 are passed to model.generate" ) parser.add_argument("--data_dir", type=str, help="like cnn_dm/test.source") parser.add_argument( "--model_name", type=str, help="like facebook/bart-large-cnn,t5-base, etc.", default="sshleifer/distilbart-xsum-12-3", ) parser.add_argument("--save_dir", type=str, help="where to save", default="tmp_gen") parser.add_argument("--max_source_length", type=int, default=None) parser.add_argument( "--type_path", type=str, default="test", help="which subset to evaluate typically train/val/test") parser.add_argument("--reference_path", type=str, required=False, help="like cnn_dm/test.target") parser.add_argument("--task", type=str, default="summarization", help="used for task_specific_params + metrics") parser.add_argument("--bs", type=int, default=8, required=False, help="batch size") parser.add_argument("--local_rank", type=int, default=-1, required=False, help="should be passed by distributed.launch") parser.add_argument("--n_obs", type=int, default=None, required=False, help="How many observations. Defaults to all.") parser.add_argument( "--sync_timeout", type=int, default=600, required=False, help= "How long should master process wait for other processes to finish.", ) parser.add_argument("--fp16", action="store_true") parser.add_argument("--debug", action="store_true") start_time = time.time() args, rest = parser.parse_known_args() generate_kwargs = parse_numeric_n_bool_cl_kwargs(rest) if generate_kwargs and args.local_rank <= 0: print(f"parsed the following generate kwargs: {generate_kwargs}") json_save_dir = Path(args.save_dir + "_tmp") Path(json_save_dir).mkdir(exist_ok=True) # this handles locking. intermediate_files = list(json_save_dir.glob("rank_*.json")) if intermediate_files: raise ValueError( f"Found files at {json_save_dir} please move or remove them.") # In theory, a node could finish and save before another node hits this. If this happens, we can address later. Path(args.save_dir).mkdir(exist_ok=True) results, num_replicas = eval_data_dir( args.data_dir, json_save_dir, args.model_name, type_path=args.type_path, bs=args.bs, fp16=args.fp16, task=args.task, local_rank=args.local_rank, n_obs=args.n_obs, max_source_length=args.max_source_length, **generate_kwargs, ) if args.local_rank <= 0: save_dir = Path(args.save_dir) save_dir.mkdir(exist_ok=True) partial_results = gather_results_from_each_node( num_replicas, json_save_dir, args.sync_timeout) preds = combine_partial_results(partial_results) tgt_file = Path(args.data_dir).joinpath(args.type_path + ".target") labels = [x.rstrip() for x in open(tgt_file).readlines()][:len(preds)] # Calculate metrics, save metrics, and save _generations.txt calc_bleu = "translation" in args.task score_fn = calculate_bleu if calc_bleu else calculate_rouge metric_name = "bleu" if calc_bleu else "rouge" metrics: Dict = score_fn(preds, labels) metrics["n_obs"] = len(preds) runtime = time.time() - start_time metrics["seconds_per_sample"] = round(runtime / metrics["n_obs"], 4) metrics["n_gpus"] = num_replicas # TODO(@stas00): add whatever metadata to metrics metrics_save_path = save_dir.joinpath( f"{args.type_path}_{metric_name}.json") save_json(metrics, metrics_save_path, indent=None) print(metrics) write_txt_file(preds, save_dir.joinpath(f"{args.type_path}_generations.txt")) if args.debug: write_txt_file(labels, save_dir.joinpath(f"{args.type_path}.target")) else: shutil.rmtree(json_save_dir)
def run_generate(verbose=True): """ Takes input text, generates output, and then using reference calculates the BLEU scores. The results are saved to a file and returned to the caller, and printed out unless ``verbose=False`` is passed. Args: verbose (:obj:`bool`, `optional`, defaults to :obj:`True`): print results to stdout Returns: a tuple: ``(scores, params}`` - ``scores``: a dict of scores data ``{'bleu': 39.6501, 'n_obs': 2000, 'runtime': 186, 'seconds_per_sample': 0.093}`` - ``params``: a dict of custom params, e.g. ``{'num_beams': 5, 'length_penalty': 0.8}`` """ parser = argparse.ArgumentParser() parser.add_argument("model_name", type=str, help="like facebook/bart-large-cnn,t5-base, etc.") parser.add_argument("input_path", type=str, help="like cnn_dm/test.source") parser.add_argument("save_path", type=str, help="where to save summaries") parser.add_argument("--reference_path", type=str, required=False, help="like cnn_dm/test.target") parser.add_argument("--score_path", type=str, required=False, default="metrics.json", help="where to save metrics") parser.add_argument("--device", type=str, required=False, default=DEFAULT_DEVICE, help="cuda, cuda:1, cpu etc.") parser.add_argument("--prefix", type=str, required=False, default=None, help="will be added to the begininng of src examples") parser.add_argument("--task", type=str, default="summarization", help="used for task_specific_params + metrics") parser.add_argument("--bs", type=int, default=8, required=False, help="batch size") parser.add_argument("--n_obs", type=int, default=-1, required=False, help="How many observations. Defaults to all.") parser.add_argument("--max_length", type=int, default=512, required=False, help="batch size") parser.add_argument("--fp16", action="store_true") parser.add_argument("--vocab_file", type=str, default="", required=False) parser.add_argument("--tokenizer_name", type=str, default="", required=False) parser.add_argument("--dump-args", action="store_true", help="print the custom hparams with the results") parser.add_argument( "--info", nargs="?", type=str, const=datetime_now(), help= "use in conjunction w/ --dump-args to print with the results whatever other info you'd like, e.g. lang=en-ru. If no value is passed, the current datetime string will be used.", ) # Unspecified args like --num_beams=2 --decoder_start_token_id=4 are passed to model.generate args, rest = parser.parse_known_args() parsed_args = parse_numeric_n_bool_cl_kwargs(rest) if parsed_args and verbose: print(f"parsed the following generate kwargs: {parsed_args}") examples = [ " " + x.rstrip() if "t5" in args.model_name else x.rstrip() for x in open(args.input_path).readlines() ] if args.n_obs > 0: examples = examples[:args.n_obs] Path(args.save_path).parent.mkdir(exist_ok=True) if args.reference_path is None and Path(args.score_path).exists(): warnings.warn( f"score_path {args.score_path} will be overwritten unless you type ctrl-c." ) runtime_metrics = generate_summaries_or_translations( examples, args.save_path, args.model_name, batch_size=args.bs, device=args.device, fp16=args.fp16, task=args.task, prefix=args.prefix, tokenizer_name=args.tokenizer_name, vocab_file=args.vocab_file, max_length=args.max_length, **parsed_args, ) if args.reference_path is None: return {} # Compute scores score_fn = calculate_bleu if "translation" in args.task else calculate_rouge output_lns = [x.rstrip() for x in open(args.save_path).readlines()] reference_lns = [ x.rstrip() for x in open(args.reference_path).readlines() ][:len(output_lns)] scores: dict = score_fn(output_lns, reference_lns) scores.update(runtime_metrics) if args.dump_args: scores.update(parsed_args) if args.info: scores["info"] = args.info if verbose: print(scores) if args.score_path is not None: json.dump(scores, open(args.score_path, "w")) return scores