def fairseq_train_and_evaluate(dataset, metrics_coefs=[1, 1, 1], parametrization_budget=64, **kwargs): check_dataset(dataset) kwargs = check_and_resolve_args(kwargs) exp_dir = prepare_exp_dir() preprocessors_kwargs = kwargs.get('preprocessors_kwargs', {}) preprocessors = get_preprocessors(preprocessors_kwargs) if len(preprocessors) > 0: dataset = create_preprocessed_dataset(dataset, preprocessors, n_jobs=1) shutil.copy(get_dataset_dir(dataset) / 'preprocessors.pickle', exp_dir) preprocessed_dir = fairseq_preprocess(dataset) train_kwargs = get_allowed_kwargs(fairseq_train, preprocessed_dir, exp_dir, **kwargs) fairseq_train(preprocessed_dir, exp_dir=exp_dir, **train_kwargs) # Evaluation generate_kwargs = get_allowed_kwargs(fairseq_generate, 'complex_filepath', 'pred_filepath', exp_dir, **kwargs) recommended_preprocessors_kwargs = find_best_parametrization( exp_dir, metrics_coefs, preprocessors_kwargs, parametrization_budget) print( f'recommended_preprocessors_kwargs={recommended_preprocessors_kwargs}') simplifier = get_simplifier(exp_dir, recommended_preprocessors_kwargs, generate_kwargs) scores = evaluate_simplifier_on_turkcorpus(simplifier, phase='valid') print(f'scores={scores}') score = combine_metrics(scores['BLEU'], scores['SARI'], scores['FKGL'], metrics_coefs) return score
def evaluate_parametrization(**instru_kwargs): # Note that we use default generate kwargs instead of provided one because they are faster preprocessors_kwargs = instru_kwargs_to_preprocessors_kwargs( instru_kwargs) simplifier = get_simplifier(exp_dir, preprocessors_kwargs=preprocessors_kwargs, generate_kwargs={}) scores = evaluate_simplifier_on_turkcorpus(simplifier, phase='valid') return combine_metrics(scores['BLEU'], scores['SARI'], scores['FKGL'], metrics_coefs)
# Copyright (c) Facebook, Inc. and its affiliates. # All rights reserved. # # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. # from access.evaluation.general import evaluate_simplifier_on_turkcorpus from access.preprocessors import get_preprocessors from access.resources.prepare import prepare_turkcorpus, prepare_models from access.simplifiers import get_fairseq_simplifier, get_preprocessed_simplifier if __name__ == '__main__': print('Evaluating pretrained model') prepare_turkcorpus() best_model_dir = prepare_models() recommended_preprocessors_kwargs = { 'LengthRatioPreprocessor': {'target_ratio': 0.95}, 'LevenshteinPreprocessor': {'target_ratio': 0.75}, 'WordRankRatioPreprocessor': {'target_ratio': 0.75}, 'SentencePiecePreprocessor': {'vocab_size': 10000}, } preprocessors = get_preprocessors(recommended_preprocessors_kwargs) simplifier = get_fairseq_simplifier(best_model_dir, beam=8) simplifier = get_preprocessed_simplifier(simplifier, preprocessors=preprocessors) evaluate_simplifier_on_turkcorpus(simplifier, phase='test')