Beispiel #1
0
def generate_sentence_examples(ref,
                               outs,
                               src=None,
                               score_type='sentbleu',
                               report_length=10,
                               compare_directions='0-1',
                               title=None,
                               case_insensitive=False):
    """
  Generate examples of sentences that satisfy some criterion, usually score of one system better

  Args:
    ref: Tokens from the reference
    outs: Tokens from the output file(s)
    src: Tokens from the source (optional)
    score_type: The type of scorer to use
    report_length: Number of sentences to print for each system being better or worse
    compare_directions: A string specifying which systems to compare
    title: A string specifying the caption of the printed table
    case_insensitive: A boolean specifying whether to turn on the case insensitive option
  """
    report_length = int(report_length)
    case_insensitive = True if case_insensitive == 'True' else False

    scorer = scorers.create_scorer_from_profile(
        score_type, case_insensitive=case_insensitive)

    direcs = arg_utils.parse_compare_directions(compare_directions)

    scorediff_lists = []
    for (left, right) in direcs:
        scorediff_list = []
        deduplicate_set = set()
        for i, (o1, o2, r) in enumerate(zip(outs[left], outs[right], ref)):
            if (tuple(o1), tuple(o2), tuple(r)) in deduplicate_set:
                continue
            deduplicate_set.add((tuple(o1), tuple(o2), tuple(r)))
            s1, str1 = scorer.score_sentence(r, o1)
            s2, str2 = scorer.score_sentence(r, o2)
            scorediff_list.append((s2 - s1, s1, s2, str1, str2, i))
        scorediff_list.sort()
        scorediff_lists.append(scorediff_list)

    reporter = reporters.SentenceExampleReport(report_length=report_length,
                                               scorediff_lists=scorediff_lists,
                                               scorer=scorer,
                                               ref=ref,
                                               outs=outs,
                                               src=src,
                                               compare_directions=direcs,
                                               title=title)
    reporter.generate_report()
    return reporter
Beispiel #2
0
def generate_ngram_report(ref, outs,
                       min_ngram_length=1, max_ngram_length=4,
                       report_length=50, alpha=1.0, compare_type='match',
                       ref_labels=None, out_labels=None,
                       compare_directions='0-1',
                       case_insensitive=False):
  """
  Generate a report comparing aggregate n-gram statistics in both plain text and graphs

  Args:
    ref: Tokens from the reference
    outs: Tokens from the output file(s)
    min_ngram_length: minimum n-gram length
    max_ngram_length: maximum n-gram length
    report_length: the number of n-grams to report
    alpha: when sorting n-grams for salient features, the smoothing coefficient. A higher smoothing coefficient
           will result in more frequent phenomena (sometimes this is good).
    compare_type: what type of statistic to compare
                  (match: n-grams that match the reference, over: over-produced ngrams, under: under-produced ngrams)
    ref_labels: either a filename of a file full of reference labels, or a list of strings corresponding to `ref`.
                If specified, will aggregate statistics over labels instead of n-grams.
    out_labels: output labels. must be specified if ref_labels is specified.
    compare_directions: A string specifying which systems to compare
    case_insensitive: A boolean specifying whether to turn on the case insensitive option
  """
  min_ngram_length, max_ngram_length, report_length = int(min_ngram_length), int(max_ngram_length), int(report_length)
  alpha = float(alpha)
  case_insensitive = True if case_insensitive == 'True' else False

  if out_labels is not None:
    out_labels = arg_utils.parse_files(out_labels)
    if len(out_labels) != len(outs):
      raise ValueError(f'The number of output files should be equal to the number of output labels.')

  if type(ref_labels) == str:
    label_files_str = f'    ref_labels={ref_labels},'
    for i, out_label in enumerate(out_labels):
      label_files_str += f' out{i}_labels={out_label},'
    label_files = (label_files_str)
  else:
    label_files = None

  if type(alpha) == str:
    alpha = float(alpha)

  if not type(ref_labels) == str and case_insensitive:
    ref = corpus_utils.lower(ref)
    outs = [corpus_utils.lower(out) for out in outs]

  ref_labels = corpus_utils.load_tokens(ref_labels) if type(ref_labels) == str else ref_labels
  out_labels = [corpus_utils.load_tokens(out_labels[i]) if not out_labels is None else None for i in range(len(outs))]
  totals, matches, overs, unders = zip(*[ngram_utils.compare_ngrams(ref, out, ref_labels=ref_labels, out_labels=out_label,
                                                             min_length=min_ngram_length, max_length=max_ngram_length) for out, out_label in zip(outs, out_labels)])
  direcs = arg_utils.parse_compare_directions(compare_directions)
  scores = []
  for (left, right) in direcs:
    if compare_type == 'match':
      scores.append(stat_utils.extract_salient_features(matches[left], matches[right], alpha=alpha))
    elif compare_type == 'over':
      scores.append(stat_utils.extract_salient_features(overs[left], overs[right], alpha=alpha))
    elif compare_type == 'under':
      scores.append(stat_utils.extract_salient_features(unders[left], unders[right], alpha=alpha))
    else:
      raise ValueError(f'Illegal compare_type "{compare_type}"')
  scorelist = [sorted(score.items(), key=operator.itemgetter(1), reverse=True) for score in scores]

  reporter = reporters.NgramReport(scorelist=scorelist, report_length=report_length,
                                   min_ngram_length=min_ngram_length, 
                                   max_ngram_length=max_ngram_length,
                                   matches=matches,
                                   compare_type=compare_type, alpha=alpha,
                                   compare_directions=direcs,
                                   label_files=label_files)                                   
  reporter.generate_report(output_fig_file=f'ngram-min{min_ngram_length}-max{max_ngram_length}-{compare_type}',
                           output_fig_format='pdf', 
                           output_directory='outputs')
  return reporter 
Beispiel #3
0
def generate_sentence_examples(ref,
                               outs,
                               src=None,
                               score_type='sentbleu',
                               report_length=10,
                               compare_directions='0-1',
                               title=None,
                               case_insensitive=False,
                               to_cache=False,
                               cache_dicts=None):
    """
  Generate examples of sentences that satisfy some criterion, usually score of one system better

  Args:
    ref: Tokens from the reference
    outs: Tokens from the output file(s)
    src: Tokens from the source (optional)
    score_type: The type of scorer to use
    report_length: Number of sentences to print for each system being better or worse
    compare_directions: A string specifying which systems to compare
    title: A string specifying the caption of the printed table
    case_insensitive: A boolean specifying whether to turn on the case insensitive option
    to_cache: Return a list of computed statistics if True
    cache_dicts: A list of dictionaries that store cached statistics for each output
  """
    # check and set parameters
    report_length = int(report_length)
    if type(case_insensitive) == str:
        case_insensitive = True if case_insensitive == 'True' else False

    # compute statistics
    scorer = scorers.create_scorer_from_profile(
        score_type, case_insensitive=case_insensitive)

    cache_key_list = ['scores', 'strs']
    scores, strs = cache_utils.extract_cache_dicts(cache_dicts, cache_key_list,
                                                   len(outs))
    src = [None for _ in ref] if src is None else src
    if cache_dicts is None:
        scores, strs = [], []
        for out in outs:
            scores_i, strs_i = [], []
            for (r, o, s) in zip(ref, out, src):
                score, string = scorer.score_sentence(r, o, s)
                scores_i.append(score)
                strs_i.append(string)
            scores.append(scores_i)
            strs.append(strs_i)

    if to_cache:
        cache_dict = cache_utils.return_cache_dict(cache_key_list,
                                                   [scores, strs])
        return cache_dict

    direcs = arg_utils.parse_compare_directions(compare_directions)

    scorediff_lists = []
    for (left, right) in direcs:
        scorediff_list = []
        deduplicate_set = set()
        for i, (o1, o2, r) in enumerate(zip(outs[left], outs[right], ref)):
            if (tuple(o1), tuple(o2), tuple(r)) in deduplicate_set:
                continue
            deduplicate_set.add((tuple(o1), tuple(o2), tuple(r)))
            s1, str1 = scores[left][i], strs[left][i]
            s2, str2 = scores[right][i], strs[right][i]
            scorediff_list.append((s2 - s1, s1, s2, str1, str2, i))
        scorediff_list.sort()
        scorediff_lists.append(scorediff_list)

    # generate reports
    reporter = reporters.SentenceExampleReport(report_length=report_length,
                                               scorediff_lists=scorediff_lists,
                                               scorer=scorer,
                                               ref=ref,
                                               outs=outs,
                                               src=src,
                                               compare_directions=direcs,
                                               title=title)
    reporter.generate_report()
    return reporter