def load_rescore_models(self, args): """load rescoring models""" models = {} if args.l2r_model_path: l2r_model, _, l2r_task = pytorch_translate_utils.load_diverse_ensemble_for_inference( [args.l2r_model_path] ) models["l2r_model"] = {"model": l2r_model[0], "task": l2r_task} # if args.r2l_model_path: r2l_model, _, r2l_task = pytorch_translate_utils.load_diverse_ensemble_for_inference( [args.r2l_model_path] ) models["r2l_model"] = {"model": r2l_model[0], "task": r2l_task} # if args.reverse_model_path: reverse_model, _, reverse_task = pytorch_translate_utils.load_diverse_ensemble_for_inference( [args.reverse_model_path] ) models["reverse_model"] = {"model": reverse_model[0], "task": reverse_task} # if args.lm_model_path: lm_model, _, lm_task = pytorch_translate_utils.load_diverse_ensemble_for_inference( [args.lm_model_path] ) models["lm_model"] = {"model": lm_model[0], "task": lm_task} # if args.cloze_transformer_path: cloze_model, _, cloze_task = pytorch_translate_utils.load_diverse_ensemble_for_inference( [args.cloze_transformer_path] ) models["cloze_model"] = {"model": cloze_model[0], "task": cloze_task} return models
def generate(args): pytorch_translate_options.print_args(args) # Setup task task = tasks.setup_task(args) models, model_args = pytorch_translate_utils.load_diverse_ensemble_for_inference( args.path.split(":"), task) args.source_lang = model_args[0].source_lang args.target_lang = model_args[0].target_lang append_eos_to_source = model_args[0].append_eos_to_source reverse_source = model_args[0].reverse_source assert all(a.append_eos_to_source == append_eos_to_source and a.reverse_source == reverse_source for a in model_args) if args.source_binary_file != "": assert args.target_binary_file != "" task.load_dataset(args.gen_subset, args.source_binary_file, args.target_binary_file) elif pytorch_translate_data.is_multilingual(args): task.set_encoder_langs(model_args[0].multiling_encoder_lang) task.set_decoder_langs(model_args[0].multiling_decoder_lang) task.load_dataset_from_text_multilingual( args.gen_subset, source_text_file=args.source_text_file[0], target_text_file=args.target_text_file, source_lang_id=task.get_encoder_lang_id( args.multiling_source_lang[0]), target_lang_id=task.get_decoder_lang_id( args.multiling_target_lang[0]), append_eos=append_eos_to_source, reverse_source=reverse_source, ) elif args.source_ensembling: task.load_multisource_dataset_from_text( args.gen_subset, source_text_files=args.source_text_file, target_text_file=args.target_text_file, append_eos=append_eos_to_source, reverse_source=reverse_source, ) else: task.load_dataset_from_text( args.gen_subset, source_text_file=args.source_text_file[0], target_text_file=args.target_text_file, append_eos=append_eos_to_source, reverse_source=reverse_source, ) scorer, num_sentences, gen_timer, _ = _generate_score(models=models, args=args, task=task, dataset=task.dataset( args.gen_subset)) print(f"| Translated {num_sentences} sentences ({gen_timer.n} tokens) " f"in {gen_timer.sum:.1f}s ({1. / gen_timer.avg:.2f} tokens/s)") print(f"| Generate {args.gen_subset} with beam={args.beam}: " f"{scorer.result_string()}") return scorer.score()
def __init__(self, args, task): """ This code is for word-level knowledge distillation. Most of the algorithm is inspired from the Kim and Rush (2016) paper: http://www.aclweb.org/anthology/D16-1139 """ super().__init__(args, task) assert (args.teacher_path ), "Please specify at least one valid file for --teacher-path" use_cuda = torch.cuda.is_available() and not self.args.cpu # Load model ensemble from checkpoints self.teacher_models, self.teacher_model_args, _ = pytorch_translate_utils.load_diverse_ensemble_for_inference( args.teacher_path.split(":"), task) # Move models to device and to evaluation mode if use_cuda: for model in self.teacher_models: model.cuda() for model in self.teacher_models: model.make_generation_fast_(beamable_mm_beam_size=None if args. no_beamable_mm else args.beam) self.kd_weight = getattr(args, "kd_weight", 0) if self.kd_weight < 0 or self.kd_weight > 1: raise ValueError( f"--kd-weight ({self.kd_weight}) must be in [0, 1]") self.top_k_teacher_tokens = getattr(args, "top_k_teacher_tokens", 8)
def __init__(self, args, task): super().__init__(args, task) assert args.teacher_path, ( 'Please specify at least one valid file for --teacher-path') use_cuda = torch.cuda.is_available() and not self.args.cpu # Load model ensemble from checkpoints self.teacher_models, self.teacher_model_args = ( pytorch_translate_utils.load_diverse_ensemble_for_inference( [args.teacher_path], task, )) # Move models to device and to evaluation mode if use_cuda: for model in self.teacher_models: model.cuda() for model in self.teacher_models: model.make_generation_fast_(beamable_mm_beam_size=None if args. no_beamable_mm else args.beam) self.kd_weight = getattr(args, 'kd_weight', 0) if self.kd_weight < 0 or self.kd_weight > 1: raise ValueError( f'--kd-weight ({self.kd_weight}) must be in [0, 1]')
def save_top_k(args): """ This function runs forward computation on an ensemble of trained models using binarized parallel training data and saves the top-k probabilities and their corresponding token indices for each output step. Note that the Python binary accepts all generation params, but ignores inapplicable ones (such as those related to output length). --max-tokens is of particular importance to prevent memory errors. """ pytorch_translate_options.print_args(args) use_cuda = torch.cuda.is_available() and not getattr(args, "cpu", False) ( models, model_args, task, ) = pytorch_translate_utils.load_diverse_ensemble_for_inference( args.path.split(CHECKPOINT_PATHS_DELIMITER) ) for model in models: model.eval() if use_cuda: model.cuda() append_eos_to_source = model_args[0].append_eos_to_source reverse_source = model_args[0].reverse_source assert all( a.append_eos_to_source == append_eos_to_source and a.reverse_source == reverse_source for a in model_args ) assert ( args.source_binary_file != "" and args.target_binary_file != "" ), "collect_top_k_probs requires binarized data." task.load_dataset(args.gen_subset, args.source_binary_file, args.target_binary_file) assert ( args.top_k_probs_binary_file != "" ), "must specify output file (--top-k-probs-binary-file)!" output_path = args.top_k_probs_binary_file dataset = task.dataset(args.gen_subset) top_k_scores, top_k_indices = compute_top_k( task=task, models=models, dataset=dataset, k=args.k_probs_to_collect, use_cuda=use_cuda, max_tokens=args.teacher_max_tokens, max_sentences=args.max_sentences, progress_bar_args=args, ) np.savez(output_path, top_k_scores=top_k_scores, top_k_indices=top_k_indices) print( f"Saved top {top_k_scores.shape[1]} probs for a total of " f"{top_k_scores.shape[0]} tokens to file {output_path}" )
def __init__(self, args, model_path=None, model=None, forward_task=None): """ Initialize a rescorer model Args: args: model arguments model_path: checkpoint path for rescoring model """ # TODO (T40938917): Allow loading of multiple rescoring models # allow to create an empty scorer w/o model self.args = args self.forward_task = forward_task self.task = None self.model = None # Instantiate the model if model is not None: self.model = model["model"] self.task = model["task"] elif model_path: rescoring_model, _, task = utils.load_diverse_ensemble_for_inference( [model_path]) self.model = rescoring_model[0] self.task = task if self.model is not None: self.model.eval() # Turn off gradient computation in eval mode for param in self.model.parameters(): param.requires_grad = False utils.maybe_cuda(self.model)
def __init__(self, args, src_dict, tgt_dict, char_source_dict=None): super().__init__( args, src_dict=src_dict, tgt_dict=tgt_dict, char_source_dict=char_source_dict, ) self.top_k_probs_binary_file = args.top_k_probs_binary_file self.top_k_teacher_tokens = args.top_k_teacher_tokens if self.top_k_probs_binary_file is None: # Load model ensemble from checkpoints ( self.teacher_models, _, _, ) = pytorch_translate_utils.load_diverse_ensemble_for_inference( args.teacher_path.split(":")) if torch.cuda.is_available(): for teacher_model in self.teacher_models: teacher_model = pytorch_translate_utils.maybe_cuda( teacher_model) else: self.teacher_models = None # Memoized scores for teacher models. By having this and gradually memoizing # the values, we prevent the teacher model from keeping recalculating the # teacher scores. self.top_k_teacher_scores: Dict[int, np.ndarray] = {} self.top_k_teacher_indices: Dict[int, np.ndarray] = {}
def setup_rescoring(args): if args.rescoring_strategy is None or args.rescoring_model_path is None: return None # TODO (T40938917): Allow loading of multiple rescoring models rescoring_model, rescoring_model_arg, rescoring_task = pytorch_translate_utils.load_diverse_ensemble_for_inference( [args.rescoring_model_path]) return rescoring_model[0]
def main(): parser = argparse.ArgumentParser( description=("Rescore generated hypotheses with extra models")) add_args(parser) add_args_rescore(parser) args = parser.parse_args() assert (args.translation_info_export_path is not None ), "--translation_info_export_path is required for rescoring" assert args.l2r_model_path is not None, "Rescoring needs forward model" _, _, forward_task = utils.load_diverse_ensemble_for_inference( [args.l2r_model_path]) rescorer = Rescorer(args, forward_task) dst_dict = forward_task.tgt_dict base_bleu_scorer = bleu.Scorer(dst_dict.pad(), dst_dict.eos(), dst_dict.unk()) rescoring_bleu_scorer = bleu.Scorer( bleu.BleuConfig( pad=dst_dict.pad(), eos=dst_dict.eos(), unk=dst_dict.unk(), )) with open(args.translation_info_export_path, "rb") as file: translation_info_list = pickle.load(file) scores_to_export_list = [] trans_batch_info = [] for k in tqdm(range(0, len(translation_info_list), args.batch_size)): trans_batch_info = translation_info_list[k:k + args.batch_size] for j in range(len(trans_batch_info)): trans_batch_info[j]["hypos"] = [{ "score": hypo["score"], "tokens": hypo["tokens"].cuda() } for hypo in trans_batch_info[j]["hypos"]] top_tokens, scores_to_export = find_top_tokens(args, trans_batch_info, rescorer, dst_dict.pad()) if args.scores_info_export_path is not None: scores_to_export_list += scores_to_export for i, trans_info in enumerate(trans_batch_info): base_bleu_scorer.add( trans_info["target_tokens"].int().cpu(), trans_info["hypos"][0]["tokens"].int().cpu(), ) rescoring_bleu_scorer.add(trans_info["target_tokens"].int().cpu(), top_tokens[i].int().cpu()) trans_batch_info = [] print("| Base ", base_bleu_scorer.result_string()) print("| Rescoring ", rescoring_bleu_scorer.result_string()) if args.scores_info_export_path is not None: with open(args.scores_info_export_path, "wb") as file: pickle.dump(scores_to_export_list, file)
def __init__(self, args, model_path): self.args = args # TODO (T40938917): Allow loading of multiple rescoring models ( rescoring_model, rescoring_model_arg, rescoring_task, ) = utils.load_diverse_ensemble_for_inference([model_path]) self.task = rescoring_task self.model = rescoring_model[0] self.model.eval() if not self.args.cpu: utils.maybe_cuda(self.model)
def generate(args): assert_test_corpus_and_vocab_files_specified(args) assert args.path is not None, "--path required for generation!" print(args) src_dict = pytorch_translate_dictionary.Dictionary.load(args.source_vocab_file) dst_dict = pytorch_translate_dictionary.Dictionary.load(args.target_vocab_file) dataset = data.LanguageDatasets( src=args.source_lang, dst=args.target_lang, src_dict=src_dict, dst_dict=dst_dict ) models, model_args = pytorch_translate_utils.load_diverse_ensemble_for_inference( args.path, dataset.src_dict, dataset.dst_dict ) append_eos_to_source = model_args[0].append_eos_to_source reverse_source = model_args[0].reverse_source assert all( a.append_eos_to_source == append_eos_to_source and a.reverse_source == reverse_source for a in model_args ) dataset.splits[args.gen_subset] = pytorch_translate_data.make_language_pair_dataset_from_text( source_text_file=args.source_text_file, target_text_file=args.target_text_file, source_dict=src_dict, target_dict=dst_dict, append_eos=append_eos_to_source, reverse_source=reverse_source, ) if args.source_lang is None or args.target_lang is None: # record inferred languages in args args.source_lang, args.target_lang = dataset.src, dataset.dst print(f"| [{dataset.src}] dictionary: {len(dataset.src_dict)} types") print(f"| [{dataset.dst}] dictionary: {len(dataset.dst_dict)} types") print(f"| {args.gen_subset} {len(dataset.splits[args.gen_subset])} examples") scorer, num_sentences, gen_timer = _generate_score( models=models, args=args, dataset=dataset, dataset_split=args.gen_subset ) print( f"| Translated {num_sentences} sentences ({gen_timer.n} tokens) " f"in {gen_timer.sum:.1f}s ({1. / gen_timer.avg:.2f} tokens/s)" ) print( f"| Generate {args.gen_subset} with beam={args.beam}: " f"{scorer.result_string()}" ) return scorer.score()
def main(): args = get_arg_parser().parse_args() assert (args.translation_info_export_path is not None ), "--translation_info_export_path is required for rescoring" assert args.l2r_model_path is not None, "Rescoring needs forward model" _, _, forward_task = utils.load_diverse_ensemble_for_inference( [args.l2r_model_path]) rescorer = Rescorer(args, forward_task) dst_dict = forward_task.tgt_dict base_bleu_scorer = bleu.Scorer(dst_dict.pad(), dst_dict.eos(), dst_dict.unk()) rescoring_bleu_scorer = bleu.Scorer(dst_dict.pad(), dst_dict.eos(), dst_dict.unk()) translation_info_list = pickle.load( open(args.translation_info_export_path, "rb")) scores_to_export_list = [] for trans_info in tqdm(translation_info_list): trans_info["hypos"] = [{ "score": hypo["score"], "tokens": hypo["tokens"].cuda() } for hypo in trans_info["hypos"]] base_bleu_scorer.add( trans_info["target_tokens"].int().cpu(), trans_info["hypos"][0]["tokens"].int().cpu(), ) top_tokens, scores_to_export = find_top_tokens(args, trans_info, rescorer) if args.scores_info_export_path is not None: scores_to_export_list.append(scores_to_export) rescoring_bleu_scorer.add(trans_info["target_tokens"].int().cpu(), top_tokens.int().cpu()) print("| Base ", base_bleu_scorer.result_string()) print("| Rescoring ", rescoring_bleu_scorer.result_string()) if args.scores_info_export_path is not None: f = open(args.scores_info_export_path, "wb") pickle.dump(scores_to_export_list, f) f.close()
def __init__(self, args, model_path): """ Initialize a rescorer model Args: args: model arguments model_path: checkpoint path for rescoring model """ self.args = args # TODO (T40938917): Allow loading of multiple rescoring models ( rescoring_model, rescoring_model_arg, rescoring_task, ) = utils.load_diverse_ensemble_for_inference([model_path]) self.task = rescoring_task # e.g p(y), p(x|y) etc. self.model = rescoring_model[0] self.model.eval() utils.maybe_cuda(self.model)
def generate(args): assert_test_corpus_and_vocab_files_specified(args) assert args.path is not None, "--path required for generation!" print(args) # Benchmarking should be language-agnostic args.source_lang = "src" args.target_lang = "tgt" src_dict = pytorch_translate_dictionary.Dictionary.load( args.source_vocab_file) dst_dict = pytorch_translate_dictionary.Dictionary.load( args.target_vocab_file) # Generate synthetic raw text files source_text_file = generate_synthetic_text(args.source_lang, src_dict.symbols, args) target_text_file = generate_synthetic_text(args.target_lang, src_dict.symbols, args) dataset = data.LanguageDatasets(src=args.source_lang, dst=args.target_lang, src_dict=src_dict, dst_dict=dst_dict) models, model_args = pytorch_translate_utils.load_diverse_ensemble_for_inference( args.path, dataset.src_dict, dataset.dst_dict) append_eos_to_source = model_args[0].append_eos_to_source reverse_source = model_args[0].reverse_source assert all(a.append_eos_to_source == append_eos_to_source and a.reverse_source == reverse_source for a in model_args) dataset.splits[ args. gen_subset] = pytorch_translate_data.make_language_pair_dataset_from_text( source_text_file=source_text_file, target_text_file=target_text_file, source_dict=src_dict, target_dict=dst_dict, append_eos=append_eos_to_source, reverse_source=reverse_source, ) # Remove temporary text files os.remove(source_text_file) os.remove(target_text_file) if args.source_lang is None or args.target_lang is None: # record inferred languages in args args.source_lang, args.target_lang = dataset.src, dataset.dst print(f"| [{dataset.src}] dictionary: {len(dataset.src_dict)} types") print(f"| [{dataset.dst}] dictionary: {len(dataset.dst_dict)} types") print( f"| {args.gen_subset} {len(dataset.splits[args.gen_subset])} examples") args.keep_detailed_timing = True scorer, num_sentences, gen_timer = pytorch_translate_generate._generate_score( models=models, args=args, dataset=dataset, dataset_split=args.gen_subset) # Remove contribution of primer sentence gen_timer.reset_bucket(0) print( f"| Translated {num_sentences} sentences ({sum(gen_timer.n)} tokens) " f"in {sum(gen_timer.sum):.3f}s ({1. / gen_timer.avg:.2f} tokens/s)") for bucket_id in range(gen_timer.n_buckets): if gen_timer.n[bucket_id] != 0: print( " | Length {}: {} sentences ({} tok) in {:.3f}s ({:.3f} tok/s, avg. latency {:4f}s)" .format( bucket_id * args.increment, gen_timer.count[bucket_id], gen_timer.n[bucket_id], gen_timer.sum[bucket_id], 1. / gen_timer.avgs[bucket_id], gen_timer.sum[bucket_id] / gen_timer.count[bucket_id], )) print(f"| Generate {args.gen_subset} with beam={args.beam}: " f"{scorer.result_string()}") return scorer.score()
def setup_attack(args): """Load model, data and create the AdversarialTrainer object""" # Setup task task = tasks.setup_task(args) # Load model models, models_args = pytorch_translate_utils.load_diverse_ensemble_for_inference( args.path.split(':'), task, ) # Only one model is supported as of now model, model_args = models[0], models_args[0] # Languages args.source_lang = model_args.source_lang args.target_lang = model_args.target_lang # Keep track of whether we reverse the source or not # (this is important to save the adversarial inputs in the correct order) args.reverse_source = model_args.reverse_source # Load dataset task.load_dataset_from_text( args.gen_subset, source_text_file=args.source_text_file, target_text_file=args.target_text_file, append_eos=model_args.append_eos_to_source, reverse_source=model_args.reverse_source, ) # Create adversarial criterion adv_criterion = task.build_adversarial_criterion(args) # Adversary adversary = adversaries.build_adversary(args, model, task) # Print a bit of info print(f"| model {model_args.arch}, " f"adversarial criterion {adv_criterion.__class__.__name__}, " f"adversary {adversary.__class__.__name__}") # Build trainer adv_trainer = adversarial_trainer.AdversarialTrainer( args=args, task=task, model=model, criterion=None, adversarial_criterion=adv_criterion, adversary=adversary) # Device infos # For now only 1 GPU is supported distributed_world_size = getattr(args, "distributed_world_size", 1) print(f"| Attacking on {distributed_world_size} GPU(s)") print( f"| max tokens per GPU = {args.max_tokens} and \ max sentences per GPU = {args.max_sentences}", flush=True, ) return adv_trainer, task
def generate(args): pytorch_translate_options.print_args(args) src_dict = pytorch_translate_dictionary.Dictionary.load(args.source_vocab_file) dst_dict = pytorch_translate_dictionary.Dictionary.load(args.target_vocab_file) use_char_source = args.char_source_vocab_file != "" if use_char_source: char_source_dict = pytorch_translate_dictionary.Dictionary.load( args.char_source_vocab_file ) # this attribute is used for CharSourceModel construction args.char_source_dict_size = len(char_source_dict) else: char_source_dict = None dataset = data.LanguageDatasets( src=args.source_lang, dst=args.target_lang, src_dict=src_dict, dst_dict=dst_dict ) models, model_args = pytorch_translate_utils.load_diverse_ensemble_for_inference( args.path, dataset.src_dict, dataset.dst_dict ) append_eos_to_source = model_args[0].append_eos_to_source reverse_source = model_args[0].reverse_source assert all( a.append_eos_to_source == append_eos_to_source and a.reverse_source == reverse_source for a in model_args ) if args.source_binary_file != "": assert args.target_binary_file != "" dst_dataset = pytorch_translate_data.InMemoryNumpyDataset.create_from_file( args.target_binary_file ) if use_char_source: src_dataset = char_data.InMemoryNumpyWordCharDataset.create_from_file( args.source_binary_file ) gen_split = char_data.LanguagePairSourceCharDataset( src=src_dataset, dst=dst_dataset, pad_idx=src_dict.pad(), eos_idx=dst_dict.eos(), ) else: src_dataset = pytorch_translate_data.InMemoryNumpyDataset.create_from_file( args.source_binary_file ) gen_split = data.LanguagePairDataset( src=src_dataset, dst=dst_dataset, pad_idx=src_dict.pad(), eos_idx=dst_dict.eos(), ) elif pytorch_translate_data.is_multilingual(args): gen_split = pytorch_translate_data.make_language_pair_dataset_from_text_multilingual( source_text_file=args.source_text_file[0], target_text_file=args.target_text_file, source_lang_id=args.multiling_source_lang_id, target_lang_id=args.multiling_target_lang_id, source_dict=src_dict, target_dict=dst_dict, append_eos=append_eos_to_source, reverse_source=reverse_source, ) elif args.source_ensembling: gen_split = multisource_data.make_multisource_language_pair_dataset_from_text( source_text_files=args.source_text_file, target_text_file=args.target_text_file, source_dict=src_dict, target_dict=dst_dict, append_eos=append_eos_to_source, reverse_source=reverse_source, ) else: gen_split = pytorch_translate_data.make_language_pair_dataset_from_text( source_text_file=args.source_text_file[0], target_text_file=args.target_text_file, source_dict=src_dict, target_dict=dst_dict, append_eos=append_eos_to_source, reverse_source=reverse_source, char_source_dict=char_source_dict, ) dataset.splits[args.gen_subset] = gen_split if args.source_lang is None or args.target_lang is None: # record inferred languages in args args.source_lang, args.target_lang = dataset.src, dataset.dst print(f"| [{dataset.src}] dictionary: {len(dataset.src_dict)} types") print(f"| [{dataset.dst}] dictionary: {len(dataset.dst_dict)} types") print(f"| {args.gen_subset} {len(dataset.splits[args.gen_subset])} examples") scorer, num_sentences, gen_timer, _ = _generate_score( models=models, args=args, dataset=dataset, dataset_split=args.gen_subset ) print( f"| Translated {num_sentences} sentences ({gen_timer.n} tokens) " f"in {gen_timer.sum:.1f}s ({1. / gen_timer.avg:.2f} tokens/s)" ) print( f"| Generate {args.gen_subset} with beam={args.beam}: " f"{scorer.result_string()}" ) return scorer.score()
def generate(args): pytorch_translate_options.print_args(args) models, model_args, task = pytorch_translate_utils.load_diverse_ensemble_for_inference( args.path.split(CHECKPOINT_PATHS_DELIMITER) ) args.source_lang = model_args[0].source_lang args.target_lang = model_args[0].target_lang append_eos_to_source = model_args[0].append_eos_to_source reverse_source = model_args[0].reverse_source assert all( a.append_eos_to_source == append_eos_to_source and a.reverse_source == reverse_source for a in model_args ) if args.source_binary_file != "": assert args.target_binary_file != "" if isinstance(task, PytorchTranslateTask): task.load_dataset( args.gen_subset, args.source_binary_file, args.target_binary_file, is_npz=args.is_npz, ) else: task.load_dataset( args.gen_subset, args.source_binary_file, args.target_binary_file ) elif pytorch_translate_data.is_multilingual_many_to_one(args): task.set_encoder_langs(model_args[0].multiling_encoder_lang) task.set_decoder_langs(model_args[0].multiling_decoder_lang) task.load_dataset_from_text_multilingual( args.gen_subset, source_text_file=args.source_text_file[0], target_text_file=args.target_text_file, source_lang_id=task.get_encoder_lang_id(args.multiling_source_lang[0]), target_lang_id=task.get_decoder_lang_id(args.multiling_target_lang[0]), append_eos=append_eos_to_source, reverse_source=reverse_source, ) elif args.source_ensembling: task.load_multisource_dataset_from_text( args.gen_subset, source_text_files=args.source_text_file, target_text_file=args.target_text_file, append_eos=append_eos_to_source, reverse_source=reverse_source, ) else: task.load_dataset_from_text( args.gen_subset, source_text_file=args.source_text_file[0], target_text_file=args.target_text_file, append_eos=append_eos_to_source, reverse_source=reverse_source, ) lang_pair = None if isinstance(task, PyTorchTranslateMultiTask): if args.source_lang and args.target_lang: lang_pair = args.source_lang + "-" + args.target_lang else: lang_pair = "src-tgt" scorer, num_sentences, gen_timer, _ = generate_score( args=args, task=task, dataset=task.dataset(args.gen_subset), lang_pair=lang_pair, models=models, ) print( f"| Translated {num_sentences} sentences ({gen_timer.n} tokens) " f"in {gen_timer.sum:.1f}s ({1. / gen_timer.avg:.2f} tokens/s)" ) print( f"| Generate {args.gen_subset} with beam={args.beam}: " f"{scorer.result_string()}" ) return scorer.score()
def generate(args): assert_test_corpus_and_vocab_files_specified(args) assert args.path is not None, "--path required for generation!" print(args) # Benchmarking should be language-agnostic args.source_lang = "src" args.target_lang = "tgt" task = tasks.setup_task(args) models, model_args = pytorch_translate_utils.load_diverse_ensemble_for_inference( args.path.split(':'), task) # Generate synthetic raw text files source_text_file = generate_synthetic_text(args.source_lang, task.source_dictionary.symbols, args) target_text_file = generate_synthetic_text(args.target_lang, task.target_dictionary.symbols, args) append_eos_to_source = model_args[0].append_eos_to_source reverse_source = model_args[0].reverse_source assert all(a.append_eos_to_source == append_eos_to_source and a.reverse_source == reverse_source for a in model_args) task.load_dataset_from_text( args.gen_subset, source_text_file=source_text_file, target_text_file=target_text_file, append_eos=append_eos_to_source, reverse_source=reverse_source, ) # Remove temporary text files os.remove(source_text_file) os.remove(target_text_file) args.keep_detailed_timing = True scorer, num_sentences, gen_timer, _ = pytorch_translate_generate._generate_score( models=models, args=args, task=task, dataset_split=args.gen_subset) # Remove contribution of primer sentence gen_timer.reset_bucket(0) print( f"| Translated {num_sentences} sentences ({sum(gen_timer.n)} tokens) " f"in {sum(gen_timer.sum):.3f}s ({1. / gen_timer.avg:.2f} tokens/s)") for bucket_id in range(gen_timer.n_buckets): if gen_timer.n[bucket_id] != 0: print( " | Length {}: {} sentences ({} tok) in {:.3f}s ({:.3f} tok/s, avg. latency {:4f}s)" .format( bucket_id * args.increment, gen_timer.count[bucket_id], gen_timer.n[bucket_id], gen_timer.sum[bucket_id], 1. / gen_timer.avgs[bucket_id], gen_timer.sum[bucket_id] / gen_timer.count[bucket_id], )) print(f"| Generate {args.gen_subset} with beam={args.beam}: " f"{scorer.result_string()}") return scorer.score()
def benchmark(args): assert args.source_vocab_file and os.path.isfile( args.source_vocab_file ), "Please specify a valid file for --source-vocab-file" assert args.target_vocab_file and os.path.isfile( args.target_vocab_file ), "Please specify a valid file for --target-vocab_file" assert args.path is not None, "--path required for generation!" print(args) # Benchmarking should be language-agnostic args.source_lang = "src" args.target_lang = "tgt" models, model_args, task = pytorch_translate_utils.load_diverse_ensemble_for_inference( args.path.split(":") ) append_eos_to_source = model_args[0].append_eos_to_source reverse_source = model_args[0].reverse_source assert all( a.append_eos_to_source == append_eos_to_source and a.reverse_source == reverse_source for a in model_args ) def benchmark_length(n): # Generate synthetic raw text files source_text_file = generate_synthetic_text( dialect=args.source_lang, dialect_symbols=task.source_dictionary.symbols, length=n, examples=args.examples_per_length, ) target_text_file = generate_synthetic_text( dialect=args.target_lang, dialect_symbols=task.target_dictionary.symbols, length=n, examples=args.examples_per_length, ) task.load_dataset_from_text( args.gen_subset, source_text_file=source_text_file, target_text_file=target_text_file, append_eos=append_eos_to_source, reverse_source=reverse_source, ) # Remove temporary text files os.remove(source_text_file) os.remove(target_text_file) # priming scorer, num_sentences, gen_timer, _ = pytorch_translate_generate.generate_score( models=models, args=args, task=task, dataset=task.dataset(args.gen_subset) ) total_time = 0.0 for _ in range(args.runs_per_length): scorer, num_sentences, gen_timer, _ = pytorch_translate_generate.generate_score( models=models, args=args, task=task, dataset=task.dataset(args.gen_subset), ) total_time += gen_timer.sum gen_timer.reset() sentences_per_run = args.examples_per_length runs = args.runs_per_length total_sentences = sentences_per_run * runs total_tokens = total_sentences * n print(f"--- {n} tokens ---") print(f"Generated {total_tokens} tokens ({runs} runs of {sentences_per_run})") print(f"Total time: {total_time:.3f} seconds") time_per_sentence = total_time / total_sentences print(f"Time per sentence: {time_per_sentence:.3f} seconds\n") benchmark_length(6) benchmark_length(10) benchmark_length(20)