Exemple #1
0
def generate_score_report(ref,
                          outs,
                          score_type='bleu',
                          bootstrap=0,
                          prob_thresh=0.05,
                          meteor_directory=None,
                          options=None,
                          title=None,
                          case_insensitive=False):
    """
  Generate a report comparing overall scores of system(s) in both plain text and graphs.

  Args:
    ref: Tokens from the reference
    outs: Tokens from the output file(s)
    score_type: A string specifying the scoring type (bleu/length)
    bootstrap: Number of samples for significance test (0 to disable)
    prob_thresh: P-value threshold for significance test
    meteor_directory: Path to the directory of the METEOR code
    options: Options when using external program
    compare_directions: A string specifying which systems to compare 
    title: A string specifying the caption of the printed table
    case_insensitive: A boolean specifying whether to turn on the case insensitive option
  """
    bootstrap = int(bootstrap)
    prob_thresh = float(prob_thresh)
    case_insensitive = True if case_insensitive == 'True' else False

    scorer = scorers.create_scorer_from_profile(
        score_type,
        case_insensitive=case_insensitive,
        meteor_directory=meteor_directory,
        options=options)

    scores, strs = zip(*[scorer.score_corpus(ref, out) for out in outs])

    if bootstrap != 0:
        direcs = []
        for i in range(len(scores)):
            for j in range(i + 1, len(scores)):
                direcs.append((i, j))
        wins, sys_stats = sign_utils.eval_with_paired_bootstrap(
            ref, outs, scorer, direcs, num_samples=bootstrap)
        wins = list(zip(direcs, wins))
    else:
        wins = sys_stats = direcs = None

    reporter = reporters.ScoreReport(scorer=scorer,
                                     scores=scores,
                                     strs=strs,
                                     wins=wins,
                                     sys_stats=sys_stats,
                                     prob_thresh=prob_thresh,
                                     title=title)
    reporter.generate_report(output_fig_file=f'score-{score_type}-{bootstrap}',
                             output_fig_format='pdf',
                             output_directory='outputs')
    return reporter
Exemple #2
0
def generate_score_report(ref,
                          outs,
                          src=None,
                          score_type='bleu',
                          bootstrap=0,
                          prob_thresh=0.05,
                          meteor_directory=None,
                          options=None,
                          title=None,
                          case_insensitive=False,
                          to_cache=False,
                          cache_dicts=None):
    """
  Generate a report comparing overall scores of system(s) in both plain text and graphs.

  Args:
    ref: Tokens from the reference
    outs: Tokens from the output file(s)
    src: Tokens for the source 
    score_type: A string specifying the scoring type (bleu/length)
    bootstrap: Number of samples for significance test (0 to disable)
    prob_thresh: P-value threshold for significance test
    meteor_directory: Path to the directory of the METEOR code
    options: Options when using external program
    compare_directions: A string specifying which systems to compare 
    title: A string specifying the caption of the printed table
    case_insensitive: A boolean specifying whether to turn on the case insensitive option
    to_cache: Return a list of computed statistics if True
    cache_dicts: A list of dictionaries that store cached statistics for each output
  """
    # check and set parameters
    bootstrap = int(bootstrap)
    prob_thresh = float(prob_thresh)
    if type(case_insensitive) == str:
        case_insensitive = True if case_insensitive == 'True' else False

    # compute statistics
    scorer = scorers.create_scorer_from_profile(
        score_type,
        case_insensitive=case_insensitive,
        meteor_directory=meteor_directory,
        options=options)

    cache_key_list = ['scores', 'strs', 'sign_stats']
    scores, strs, sign_stats = cache_utils.extract_cache_dicts(
        cache_dicts, cache_key_list, len(outs))
    if cache_dicts is None:
        scores, strs = zip(
            *[scorer.score_corpus(ref, out, src=src) for out in outs])

    if to_cache:
        cache_dict = cache_utils.return_cache_dict(
            cache_key_list,
            [scores, strs, [scorer.cache_stats(ref, outs[0], src=src)]])
        return cache_dict

    if bootstrap != 0:
        direcs = []
        for i in range(len(scores)):
            for j in range(i + 1, len(scores)):
                direcs.append((i, j))
        wins, sys_stats = sign_utils.eval_with_paired_bootstrap(
            ref,
            outs,
            src,
            scorer,
            direcs,
            num_samples=bootstrap,
            cache_stats=sign_stats)
        wins = list(zip(direcs, wins))
    else:
        wins = sys_stats = None

    # generate reports
    reporter = reporters.ScoreReport(scorer=scorer,
                                     scores=scores,
                                     strs=strs,
                                     wins=wins,
                                     sys_stats=sys_stats,
                                     prob_thresh=prob_thresh,
                                     title=title)
    reporter.generate_report(output_fig_file=f'score-{score_type}-{bootstrap}',
                             output_fig_format='pdf',
                             output_directory='outputs')
    return reporter