def get_baseline_rows(orig_sents_path, refs_sents_paths, language): refs_sents_paths = list(refs_sents_paths) rows = [] scores = evaluate_system_output( 'custom', sys_sents_path=orig_sents_path, orig_sents_path=orig_sents_path, refs_sents_paths=refs_sents_paths, metrics=['sari', 'bleu', 'fkgl', 'sari_by_operation'], quality_estimation=False, ) row = { 'exp_name': 'Identity', 'language': language, } rows.append(add_dicts(row, scores)) scores = evaluate_system_output( 'custom', sys_sents_path=apply_line_function_to_file( lambda sentence: truncate( sentence, truncate_prop=0.2, language=language), orig_sents_path), orig_sents_path=orig_sents_path, refs_sents_paths=refs_sents_paths, metrics=['sari', 'bleu', 'fkgl', 'sari_by_operation'], quality_estimation=False, ) row = { 'exp_name': 'Truncate', 'language': language, } rows.append(add_dicts(row, scores)) if len(refs_sents_paths) > 1: for i in range(len(refs_sents_paths)): scores = evaluate_system_output( 'custom', sys_sents_path=refs_sents_paths[i], orig_sents_path=orig_sents_path, refs_sents_paths=[refs_sents_paths[i - 1]] + refs_sents_paths[:i] + refs_sents_paths[i + 1:], metrics=['sari', 'bleu', 'fkgl', 'sari_by_operation'], quality_estimation=False, ) row = { 'exp_name': 'Reference', 'language': language, 'job_id': f'ref_{i}', } rows.append(add_dicts(row, scores)) return rows
def evaluate_simplifier_on_turkcorpus(simplifier, phase): pred_filepath = get_prediction_on_turkcorpus(simplifier, phase) pred_filepath = lowercase_file(pred_filepath) pred_filepath = to_lrb_rrb_file(pred_filepath) if phase == 'test': return evaluate_system_output('turk', sys_sents_path=pred_filepath, metrics='bleu,sari,fkgl', quality_estimation=True) elif phase == 'valid': return evaluate_system_output('turk_valid', sys_sents_path=pred_filepath, metrics='bleu,sari,fkgl', quality_estimation=True)
def evaluate_simplifier_on_turkcorpus(simplifier, phase): pred_filepath = get_prediction_on_turkcorpus(simplifier, phase) pred_filepath = lowercase_file(pred_filepath) pred_filepath = to_lrb_rrb_file(pred_filepath) return evaluate_system_output(f'turkcorpus_{phase}_legacy', sys_sents_path=pred_filepath, metrics=['bleu', 'sari_legacy', 'fkgl'], quality_estimation=True)
def get_scores_on_dataset(pred_path, dataset, phase): orig_sents_path = get_data_filepath(dataset, phase, 'complex') refs_sents_paths = list(get_dataset_dir(dataset).glob(f'{phase}.simple*')) return evaluate_system_output( 'custom', sys_sents_path=pred_path, orig_sents_path=orig_sents_path, refs_sents_paths=refs_sents_paths, metrics=['sari', 'bleu', 'fkgl', 'sari_by_operation'], quality_estimation=False, )
def evaluate_simplifier(simplifier, test_set, orig_sents_path=None, refs_sents_paths=None, quality_estimation=False): orig_sents, _ = get_orig_and_refs_sents(test_set, orig_sents_path=orig_sents_path, refs_sents_paths=refs_sents_paths) orig_sents_path = get_temp_filepath() write_lines(orig_sents, orig_sents_path) sys_sents_path = simplifier(orig_sents_path) return evaluate_system_output( test_set, sys_sents_path=sys_sents_path, orig_sents_path=orig_sents_path, refs_sents_paths=refs_sents_paths, metrics=['sari', 'bleu', 'fkgl'], quality_estimation=quality_estimation, )