def generate_score_report(ref, outs, score_type='bleu', bootstrap=0, prob_thresh=0.05, meteor_directory=None, options=None, title=None, case_insensitive=False): """ Generate a report comparing overall scores of system(s) in both plain text and graphs. Args: ref: Tokens from the reference outs: Tokens from the output file(s) score_type: A string specifying the scoring type (bleu/length) bootstrap: Number of samples for significance test (0 to disable) prob_thresh: P-value threshold for significance test meteor_directory: Path to the directory of the METEOR code options: Options when using external program compare_directions: A string specifying which systems to compare title: A string specifying the caption of the printed table case_insensitive: A boolean specifying whether to turn on the case insensitive option """ bootstrap = int(bootstrap) prob_thresh = float(prob_thresh) case_insensitive = True if case_insensitive == 'True' else False scorer = scorers.create_scorer_from_profile( score_type, case_insensitive=case_insensitive, meteor_directory=meteor_directory, options=options) scores, strs = zip(*[scorer.score_corpus(ref, out) for out in outs]) if bootstrap != 0: direcs = [] for i in range(len(scores)): for j in range(i + 1, len(scores)): direcs.append((i, j)) wins, sys_stats = sign_utils.eval_with_paired_bootstrap( ref, outs, scorer, direcs, num_samples=bootstrap) wins = list(zip(direcs, wins)) else: wins = sys_stats = direcs = None reporter = reporters.ScoreReport(scorer=scorer, scores=scores, strs=strs, wins=wins, sys_stats=sys_stats, prob_thresh=prob_thresh, title=title) reporter.generate_report(output_fig_file=f'score-{score_type}-{bootstrap}', output_fig_format='pdf', output_directory='outputs') return reporter
def generate_score_report(ref, outs, src=None, score_type='bleu', bootstrap=0, prob_thresh=0.05, meteor_directory=None, options=None, title=None, case_insensitive=False, to_cache=False, cache_dicts=None): """ Generate a report comparing overall scores of system(s) in both plain text and graphs. Args: ref: Tokens from the reference outs: Tokens from the output file(s) src: Tokens for the source score_type: A string specifying the scoring type (bleu/length) bootstrap: Number of samples for significance test (0 to disable) prob_thresh: P-value threshold for significance test meteor_directory: Path to the directory of the METEOR code options: Options when using external program compare_directions: A string specifying which systems to compare title: A string specifying the caption of the printed table case_insensitive: A boolean specifying whether to turn on the case insensitive option to_cache: Return a list of computed statistics if True cache_dicts: A list of dictionaries that store cached statistics for each output """ # check and set parameters bootstrap = int(bootstrap) prob_thresh = float(prob_thresh) if type(case_insensitive) == str: case_insensitive = True if case_insensitive == 'True' else False # compute statistics scorer = scorers.create_scorer_from_profile( score_type, case_insensitive=case_insensitive, meteor_directory=meteor_directory, options=options) cache_key_list = ['scores', 'strs', 'sign_stats'] scores, strs, sign_stats = cache_utils.extract_cache_dicts( cache_dicts, cache_key_list, len(outs)) if cache_dicts is None: scores, strs = zip( *[scorer.score_corpus(ref, out, src=src) for out in outs]) if to_cache: cache_dict = cache_utils.return_cache_dict( cache_key_list, [scores, strs, [scorer.cache_stats(ref, outs[0], src=src)]]) return cache_dict if bootstrap != 0: direcs = [] for i in range(len(scores)): for j in range(i + 1, len(scores)): direcs.append((i, j)) wins, sys_stats = sign_utils.eval_with_paired_bootstrap( ref, outs, src, scorer, direcs, num_samples=bootstrap, cache_stats=sign_stats) wins = list(zip(direcs, wins)) else: wins = sys_stats = None # generate reports reporter = reporters.ScoreReport(scorer=scorer, scores=scores, strs=strs, wins=wins, sys_stats=sys_stats, prob_thresh=prob_thresh, title=title) reporter.generate_report(output_fig_file=f'score-{score_type}-{bootstrap}', output_fig_format='pdf', output_directory='outputs') return reporter