def benchmark_length(n): # Generate synthetic raw text files source_text_file = generate_synthetic_text( dialect=args.source_lang, dialect_symbols=task.source_dictionary.symbols, length=n, examples=args.examples_per_length, ) target_text_file = generate_synthetic_text( dialect=args.target_lang, dialect_symbols=task.target_dictionary.symbols, length=n, examples=args.examples_per_length, ) task.load_dataset_from_text( args.gen_subset, source_text_file=source_text_file, target_text_file=target_text_file, append_eos=append_eos_to_source, reverse_source=reverse_source, ) # Remove temporary text files os.remove(source_text_file) os.remove(target_text_file) # priming scorer, num_sentences, gen_timer, _ = pytorch_translate_generate.generate_score( models=models, args=args, task=task, dataset=task.dataset(args.gen_subset) ) total_time = 0.0 for _ in range(args.runs_per_length): ( scorer, num_sentences, gen_timer, _, ) = pytorch_translate_generate.generate_score( models=models, args=args, task=task, dataset=task.dataset(args.gen_subset), ) total_time += gen_timer.sum gen_timer.reset() sentences_per_run = args.examples_per_length runs = args.runs_per_length total_sentences = sentences_per_run * runs total_tokens = total_sentences * n print(f"--- {n} tokens ---") print(f"Generated {total_tokens} tokens ({runs} runs of {sentences_per_run})") print(f"Total time: {total_time:.3f} seconds") time_per_sentence = total_time / total_sentences print(f"Time per sentence: {time_per_sentence:.3f} seconds\n")
def calculate_bleu_on_subset(args, dataset, epoch, offset, dataset_split): scorer, num_sentences, gen_timer = generate.generate_score( args=args, dataset=dataset, dataset_split=dataset_split) print( f"| epoch {epoch:03d} | offset {offset} " f"| Eval on {dataset_split} subset " f"with beam={args.beam}: {scorer.result_string()}. " f"Generated {num_sentences} sentences ({gen_timer.n} tokens) " f"in {gen_timer.sum:.1f}s ({1. / gen_timer.avg:.2f} tokens/s).", flush=True, ) return scorer.score()
def calculate_bleu_on_subset(args, task, epoch_str: str, offset, dataset_split): # This is a trick to have generate use max_sentences_valid max_sentences_train = args.max_sentences args.max_sentences = args.max_sentences_valid datasets = [] lang_pairs = [] """ In multi model training set up, evaluate one model at a time with corresponding dataset lang_pair is passed to identify model to be used for generation """ if isinstance(task, PytorchTranslateSemiSupervised) or isinstance( task, DualLearningTask ): for key, dataset in task.datasets[dataset_split].datasets.items(): datasets.append(dataset) lang_pairs.append(key) else: datasets = [task.dataset(dataset_split)] lang_pairs = [None] score_aggregator_fn = ( task.score_aggregator if hasattr(task, "score_aggregator") else sum ) scores = [] ensemble_models, _ = utils.load_ensemble_for_inference(args.path.split(":"), task) for dataset, lang_pair in zip(datasets, lang_pairs): # Generate score scorer, num_sentences, gen_timer, translation_samples = generate.generate_score( args=args, task=task, dataset=dataset, models=ensemble_models, lang_pair=lang_pair, ) scores.append(scorer.score()) print( f"| epoch {epoch_str} | offset {offset} " f"| Eval on {dataset_split} {lang_pair if lang_pair else ''} subset " f"with beam={args.beam}: {scorer.result_string()}. " f"Generated {num_sentences} sentences ({gen_timer.n} tokens) " f"in {gen_timer.sum:.1f}s ({1. / gen_timer.avg:.2f} tokens/s).", flush=True, ) # Set max_sentences to its original value args.max_sentences = max_sentences_train return score_aggregator_fn(scores), translation_samples
def calculate_bleu_on_subset(args, task, epoch_str: str, offset, dataset_split): # This is a trick to have generate use max_sentences_valid max_sentences_train = args.max_sentences args.max_sentences = args.max_sentences_valid # Generate score scorer, num_sentences, gen_timer, translation_samples = generate.generate_score( args=args, task=task, dataset_split=dataset_split) # Set max_sentences to its original value args.max_sentences = max_sentences_train print( f"| epoch {epoch_str} | offset {offset} " f"| Eval on {dataset_split} subset " f"with beam={args.beam}: {scorer.result_string()}. " f"Generated {num_sentences} sentences ({gen_timer.n} tokens) " f"in {gen_timer.sum:.1f}s ({1. / gen_timer.avg:.2f} tokens/s).", flush=True, ) return scorer.score(), translation_samples
def calculate_bleu_on_subset( args, task, epoch_str: str, offset, dataset_split, trainer, model_params: OrderedDict, ): # This function constructs a new model object based on the weights to # prevent users from accidentally passing in the model from the trainer, # since after calling make_generation_fast_(), the model would no longer be # suitable for continuing training. if args.log_verbose: print(f"| Preparing to create/load model params for BLEU score " f"calculation (epoch {epoch_str}, offset {offset}).") model = task.build_model(args) model.load_state_dict(model_params) if args.log_verbose: print(f"| Finished creating/loading model params for BLEU score " f"calculation (epoch {epoch_str}, offset {offset}).") # This is a trick to have generate use max_sentences_valid max_sentences_train = args.max_sentences args.max_sentences = args.max_sentences_valid datasets = [] lang_pairs = [] """ In multi model training set up, evaluate one model at a time with corresponding dataset lang_pair is passed to identify model to be used for generation """ if isinstance(task, PyTorchTranslateMultiTask) or isinstance( task, DualLearningTask): for key, dataset in task.datasets[dataset_split].datasets.items(): datasets.append(dataset) lang_pairs.append(key) else: datasets = [task.dataset(dataset_split)] lang_pairs = [None] score_aggregator_fn = (task.score_aggregator if hasattr( task, "score_aggregator") else sum) scores = [] for dataset, lang_pair in zip(datasets, lang_pairs): # Generate score scorer, num_sentences, gen_timer, translation_samples = generate.generate_score( args=args, task=task, dataset=dataset, models=[model], lang_pair=lang_pair, modify_target_dict=False, ) scores.append(scorer.score()) print( f"| epoch {epoch_str} | offset {offset} " f"| Eval on {dataset_split} {lang_pair if lang_pair else ''} subset " f"with beam={args.beam}: {scorer.result_string()}. " f"Generated {num_sentences} sentences ({gen_timer.n} tokens) " f"in {gen_timer.sum:.1f}s ({1. / gen_timer.avg:.2f} tokens/s).", flush=True, ) if hasattr(model, "get_teacher_model"): scorer, num_sentences, gen_timer, _ = generate.generate_score( args=args, task=task, dataset=dataset, models=[model.get_teacher_model()], lang_pair=lang_pair, ) print( f"TEACHER MODEL: | epoch {epoch_str} | offset {offset} " f"| Eval on {dataset_split} {lang_pair if lang_pair else ''} subset " f"with beam={args.beam}: {scorer.result_string()}. " f"Generated {num_sentences} sentences ({gen_timer.n} tokens) " f"in {gen_timer.sum:.1f}s ({1. / gen_timer.avg:.2f} tokens/s).", flush=True, ) # Set max_sentences to its original value args.max_sentences = max_sentences_train return score_aggregator_fn(scores), translation_samples