コード例 #1
0
 def _calc_src_buckets_and_matches(self, src_sent, src_label, ref_sent, ref_aligns, out_sents):
   # Initial setup for special cases
   if self.case_insensitive:
     src_sent = [corpus_utils.lower(w) for w in src_sent]
     ref_sent = [corpus_utils.lower(w) for w in ref_sent]
     out_sents = [[corpus_utils.lower(w) for w in out_sent] for out_sent in out_sents]
   if not src_label:
     src_label = []
   # Get matches
   _, ref_matches = self._calc_trg_matches(ref_sent, out_sents)
   # Process the source, getting the bucket
   src_buckets = [self.calc_bucket(w, label=l) for (w,l) in itertools.zip_longest(src_sent, src_label)]
   # For each source word, find the reference words that need to be correct
   src_aligns = [[] for _ in src_sent]
   for src, trg in ref_aligns:
     src_aligns[src].append(trg)
   # Calculate totals for each sentence
   num_buckets = len(self.bucket_strs)
   num_outs = len(out_sents)
   my_ref_total = np.zeros(num_buckets ,dtype=int)
   my_out_matches = np.zeros( (num_outs, num_buckets) ,dtype=int)
   for src_bucket in src_buckets:
     my_ref_total[src_bucket] += 1
   my_out_totals = np.broadcast_to(np.reshape(my_ref_total, (1, num_buckets)), (num_outs, num_buckets))
   for oai, (out_sent, ref_match) in enumerate(zip(out_sents, ref_matches)):
     for src_bucket, src_align in zip(src_buckets, src_aligns):
       if len(src_align) != 0:
         if all([ref_match[x] >= 0 for x in src_align]):
           my_out_matches[oai,src_bucket] += 1
   return my_ref_total, my_out_totals, my_out_matches, src_buckets, src_aligns, ref_matches
コード例 #2
0
    def _edit_distance(self, ref, out):
        if self.case_insensitive:
            ref = corpus_utils.lower(ref)
            out = corpus_utils.lower(out)

        sp1 = len(ref) + 1
        tp1 = len(out) + 1
        scores = np.zeros((sp1, tp1))
        equals = (np.expand_dims(np.array(ref), axis=1) == np.array(out))
        scores[:, 0] = range(sp1)
        scores[0, :] = range(tp1)

        # Forward edit distance
        for i in range(0, len(ref)):
            for j in range(0, len(out)):
                my_action = 0 if equals[i, j] else 1
                my_score = scores[i, j] + my_action * self.sub_pen
                del_score = scores[i, j + 1] + self.del_pen
                if del_score < my_score:
                    my_score = del_score
                ins_score = scores[i + 1, j] + self.ins_pen
                if ins_score < my_score:
                    my_score = ins_score
                scores[i + 1, j + 1] = my_score

        return scores[-1, -1]
コード例 #3
0
 def _calc_trg_buckets_and_matches(self, ref_sent, ref_label, out_sents, out_labels):
   # Initial setup for special cases
   if self.case_insensitive:
     ref_sent = [corpus_utils.lower(w) for w in ref_sent]
     out_sents = [[corpus_utils.lower(w) for w in out_sent] for out_sent in out_sents]
   if not ref_label:
     ref_label = []
     out_labels = [[] for _ in out_sents]
   # Get matches
   out_matches, _ = self._calc_trg_matches(ref_sent, out_sents)
   # Process the reference, getting the bucket
   ref_buckets = [self.calc_bucket(w, label=l) for (w,l) in itertools.zip_longest(ref_sent, ref_label)]
   # Process each of the outputs, finding matches
   out_buckets = [[] for _ in out_sents]
   for oai, (out_sent, out_label, match, out_buck) in \
           enumerate(itertools.zip_longest(out_sents, out_labels, out_matches, out_buckets)):
     for oi, (w, l, m) in enumerate(itertools.zip_longest(out_sent, out_label, match)):
       out_buck.append(self.calc_bucket(w, label=l) if m < 0 else ref_buckets[m])
   # Calculate totals for each sentence
   num_buckets = len(self.bucket_strs)
   num_outs = len(out_sents)
   my_ref_total = np.zeros(num_buckets ,dtype=int)
   my_out_totals = np.zeros( (num_outs, num_buckets) ,dtype=int)
   my_out_matches = np.zeros( (num_outs, num_buckets) ,dtype=int)
   for b in ref_buckets:
     my_ref_total[b] += 1
   for oi, (obs, ms) in enumerate(zip(out_buckets, out_matches)):
     for b, m in zip(obs, ms):
       my_out_totals[oi,b] += 1
       if m >= 0:
         my_out_matches[oi,b] += 1
   return my_ref_total, my_out_totals, my_out_matches, ref_buckets, out_buckets, out_matches
コード例 #4
0
    def cache_stats(self, ref, out, src=None):
        """
    Cache sufficient statistics for caculating BLEU score

    Args:
      ref: A reference corpus
      out: An output corpus
      src: A source courpus. Ignored if passed

    Returns:
      A list of cached statistics
    """
        if self.case_insensitive:
            ref = corpus_utils.lower(ref)
            out = corpus_utils.lower(out)

        cached_stats = []

        for r, o in zip(ref, out):
            prec = []
            for n in range(1, len(self.weights) + 1):
                prec.append(self._precision(r, o, n))
            cached_stats.append((len(r), len(o), prec))

        return cached_stats
コード例 #5
0
    def score_sentence(self, ref, out):
        if self.case_insensitive:
            ref = corpus_utils.lower(ref)
            out = corpus_utils.lower(out)

        if self._stemmer:
            ref = [self._stemmer.stem(x) if len(x) > 3 else x for x in ref]
            out = [self._stemmer.stem(x) if len(x) > 3 else x for x in out]

        if self.rouge_type == 'rougeL':
            scores = rouge_scorer._score_lcs(ref, out)
        elif re.match(r"rouge[0-9]$", self.rouge_type):
            n = int(self.rouge_type[5:])
            if n <= 0:
                raise ValueError(
                    f"rougen requires positive n: {self.rouge_type}")
            ref_ngrams = rouge_scorer._create_ngrams(ref, n)
            out_ngrams = rouge_scorer._create_ngrams(out, n)
            scores = rouge_scorer._score_ngrams(ref_ngrams, out_ngrams)
        else:
            raise ValueError(f"Invalid rouge type: {self.rouge_type}")

        if self.score_type == 'fmeasure':
            return scores.fmeasure, None
        elif self.score_type == 'precision':
            return scores.precision, None
        elif self.score_type == 'recall':
            return scores.recall, None
        else:
            raise ValueError(f"Invalid score type: {self.score_type}")
コード例 #6
0
    def cache_stats(self, ref, out):
        """
    Cache sufficient statistics for caculating BLEU score

    Args:
      ref: A reference corpus
      out: An output corpus

    Returns:
      A tuple of cached statistics
    """
        if self.case_insensitive:
            ref = corpus_utils.lower(ref)
            out = corpus_utils.lower(out)

        cached_ref_len = []
        cached_out_len = []
        cached_prec = []

        for r, o in zip(ref, out):
            cached_ref_len.append(len(r))
            cached_out_len.append(len(o))
            prec = []
            for n in range(1, len(self.weights) + 1):
                prec.append(self._precision(r, o, n))
            cached_prec.append(prec)

        return (cached_ref_len, cached_out_len, cached_prec)
コード例 #7
0
ファイル: bucketers.py プロジェクト: shankar0206/compare-mt
    def __init__(self,
                 freq_counts=None,
                 freq_count_file=None,
                 freq_corpus_file=None,
                 freq_data=None,
                 bucket_cutoffs=None,
                 case_insensitive=False):
        """
    A bucketer that buckets words by their frequency.

    Args:
      freq_counts: A dictionary containing word/count data.
      freq_count_file: A file containing counts for each word in tab-separated word, count format.
                       Ignored if freq_counts exists.
      freq_corpus_file: A file with a corpus used for collecting counts. Ignored if freq_count_file exists.
      freq_data: A tokenized corpus from which counts can be calculated. Ignored if freq_corpus_file exists.
      bucket_cutoffs: Cutoffs for each bucket.
                      The first bucket will be range(0,bucket_cutoffs[0]).
                      Middle buckets will be range(bucket_cutoffs[i],bucket_cutoffs[i-1].
                      Final bucket will be everything greater than bucket_cutoffs[-1].
      case_insensitive: A boolean specifying whether to turn on the case insensitive option.
    """
        self.case_insensitive = case_insensitive
        if not freq_counts:
            freq_counts = defaultdict(lambda: 0)
            if freq_count_file != None:
                print(f'Reading frequency from "{freq_count_file}"')
                with open(freq_count_file, "r") as f:
                    for line in f:
                        word, freq = line.strip().split('\t')
                        if self.case_insensitive:
                            freq_counts[corpus_utils.lower(word)] += freq
                        else:
                            freq_counts[word] = freq
            elif freq_corpus_file:
                print(f'Reading frequency from "{freq_corpus_file}"')
                for words in corpus_utils.iterate_tokens(freq_corpus_file):
                    for word in words:
                        if self.case_insensitive:
                            freq_counts[corpus_utils.lower(word)] += 1
                        else:
                            freq_counts[word] += 1
            elif freq_data:
                print('Reading frequency from the reference')
                for words in freq_data:
                    for word in words:
                        if self.case_insensitive:
                            freq_counts[corpus_utils.lower(word)] += 1
                        else:
                            freq_counts[word] += 1
            else:
                raise ValueError(
                    'Must have at least one source of frequency counts for FreqWordBucketer'
                )
        self.freq_counts = freq_counts

        if bucket_cutoffs is None:
            bucket_cutoffs = [1, 2, 3, 4, 5, 10, 100, 1000]
        self.set_bucket_cutoffs(bucket_cutoffs)
コード例 #8
0
 def calc_bucket(self, val, ref=None, src=None, label=None):
     if self.case_insensitive:
         return self.cutoff_into_bucket(
             self.scorer.score_sentence(corpus_utils.lower(ref),
                                        corpus_utils.lower(val))[0])
     else:
         return self.cutoff_into_bucket(
             self.scorer.score_sentence(ref, val, src)[0])
コード例 #9
0
  def calc_source_bucketed_matches(self, src, ref, out, ref_aligns, out_aligns, src_labels=None):
    """
    Calculate the number of matches, bucketed by the type of word we have
    This must be used with a subclass that has self.bucket_strs defined, and self.calc_bucket(word) implemented.

    Args:
      src: The source corpus
      ref: The reference corpus
      out: The output corpus
      ref_aligns: Alignments of the reference corpus
      out_aligns: Alignments of the output corpus
      src_labels: Labels of the source corpus (optional)

    Returns:
      A tuple containing:
        both_tot: the frequency of a particular bucket appearing in both output and reference
        ref_tot: the frequency of a particular bucket appearing in just reference
        out_tot: the frequency of a particular bucket appearing in just output
        rec: recall of the bucket
        prec: precision of the bucket
        fmeas: f1-measure of the bucket
    """
    if not hasattr(self, 'case_insensitive'):
      self.case_insensitive = False

    src_labels = src_labels if src_labels else []
    matches = [[0, 0, 0] for x in self.bucket_strs]
    for src_sent, ref_sent, out_sent, ref_align, out_align, src_lab in itertools.zip_longest(src, ref, out, ref_aligns, out_aligns, src_labels):
      ref_cnt = defaultdict(lambda: 0)
      for i, word in enumerate(ref_sent):
        if self.case_insensitive:
          word = corpus_utils.lower(word)
        ref_cnt[word] += 1
      for i, (src_index, trg_index) in enumerate(out_align):
        src_word = src_sent[src_index]
        word = out_sent[trg_index]
        if self.case_insensitive:
          word = corpus_utils.lower(word)
        bucket = self.calc_bucket(src_word,
                                  label=src_lab[src_index] if src_lab else None)
        if ref_cnt[word] > 0:
          ref_cnt[word] -= 1
          matches[bucket][0] += 1
        matches[bucket][2] += 1
      for i, (src_index, trg_index) in enumerate(ref_align):
        src_word = src_sent[src_index]
        bucket = self.calc_bucket(src_word,
                                  label=src_lab[src_index] if src_lab else None)
        matches[bucket][1] += 1

    for both_tot, ref_tot, out_tot in matches:
      if both_tot == 0:
        rec, prec, fmeas = 0.0, 0.0, 0.0
      else:
        rec = both_tot / float(ref_tot)
        prec = both_tot / float(out_tot)
        fmeas = 2 * prec * rec / (prec + rec)
      yield both_tot, ref_tot, out_tot, rec, prec, fmeas
コード例 #10
0
  def score_corpus(self, ref, out):

    if self.case_insensitive:
      ref = corpus_utils.lower(ref)
      out = corpus_utils.lower(out)

    bleu_object = sacrebleu.corpus_bleu([" ".join(x) for x in out],
                                        [[" ".join(x) for x in ref]])

    return bleu_object.score, None
コード例 #11
0
ファイル: align_utils.py プロジェクト: zgd716/compare-mt
def ngram_context_align(ref, out, order=-1, case_insensitive=False):
    """
  Calculate the word alignment between a reference sentence and an output sentence. 
  Proposed in the following paper:

  Automatic Evaluation of Translation Quality for Distant Language Pairs
  Hideki Isozaki, Tsutomu Hirao, Kevin Duh, Katsuhito Sudoh, Hajime Tsukada
  http://www.anthology.aclweb.org/D/D10/D10-1092.pdf 

  Args:
    ref: A reference sentence
    out: An output sentence
    order: The highest order of grams we want to consider (-1=inf)
    case_insensitive: A boolean specifying whether to turn on the case insensitive option

  Returns:
    The word alignment, represented as a list of integers. 
  """

    if case_insensitive:
        ref = corpus_utils.lower(ref)
        out = corpus_utils.lower(out)

    order = len(ref) if order == -1 else order

    ref_gram_pos = _count_ngram(ref, order)
    out_gram_pos = _count_ngram(out, order)

    worder = []
    for i, word in enumerate(out):
        if len(ref_gram_pos[1][word]) == 0:
            continue
        if len(ref_gram_pos[1][word]) == len(out_gram_pos[1][word]) == 1:
            worder.append(ref_gram_pos[1][word][0])
        else:
            word_forward = word
            word_backward = word
            for j in range(1, order):
                if i - j >= 0:
                    word_backward = out[i - j] + ' ' + word_backward
                    if len(ref_gram_pos[j + 1][word_backward]) == len(
                            out_gram_pos[j + 1][word_backward]) == 1:
                        worder.append(ref_gram_pos[j + 1][word_backward][0] +
                                      j)
                        break

                if i + j < len(out):
                    word_forward = word_forward + ' ' + out[i + j]
                    if len(ref_gram_pos[j + 1][word_forward]) == len(
                            out_gram_pos[j + 1][word_forward]) == 1:
                        worder.append(ref_gram_pos[j + 1][word_forward][0])
                        break

    return worder
コード例 #12
0
ファイル: scorers.py プロジェクト: zgd716/compare-mt
  def score_corpus(self, ref, out):
    """
    Score a corpus using ChrF score

    Args:
      ref: A reference corpus
      out: An output corpus

    Returns:
      A tuple containing a single value for the ChrF score and a string summarizing auxiliary information
    """
    if self.case_insensitive:
      chrf = self.chrf_score([[corpus_utils.lower(x)] for x in ref], corpus_utils.lower(out))
    else:
      chrf = self.chrf_score([[x] for x in ref], out)
    return chrf, None
コード例 #13
0
ファイル: scorers.py プロジェクト: awesome-archive/compare-mt
  def score_sentence(self, ref, out):
    """
    Score a single sentence with sentence-level smoothed BLEU score

    Args:
      ref: A reference sentence
      out: An output sentence

    Returns:
      The sentence-level BLEU score, and None
    """
    chencherry = nltk.translate.bleu_score.SmoothingFunction()
    if self.case_insensitive:
      return nltk.translate.bleu_score.sentence_bleu([corpus_utils.lower(ref)], corpus_utils.lower(out), smoothing_function=chencherry.method2), None
    else:  
      return nltk.translate.bleu_score.sentence_bleu([ref], out, smoothing_function=chencherry.method2), None
コード例 #14
0
    def cache_stats(self, ref, out, src=None):
        """
    Cache sufficient statistics for caculating SacreBLEU score

    Args:
      ref: A reference corpus
      out: An output corpus
      src: A source courpus. Ignored if passed

    Returns:
      A list of cached statistics
    """
        if self.case_insensitive:
            ref = corpus_utils.lower(ref)
            out = corpus_utils.lower(out)
        ref = [' '.join(x) for x in ref]
        out = [' '.join(x) for x in out]

        return self.bleu._extract_corpus_statistics(out, [ref])
コード例 #15
0
ファイル: bucketers.py プロジェクト: shankar0206/compare-mt
 def calc_bucket(self,
                 word,
                 ref_label=None,
                 out_label=None,
                 src_label=None):
     if self.case_insensitive:
         return self.cutoff_into_bucket(
             self.freq_counts.get(corpus_utils.lower(word), 0))
     else:
         return self.cutoff_into_bucket(self.freq_counts.get(word, 0))
コード例 #16
0
    def cache_stats(self, ref, out):
        """
    Cache sufficient statistics for caculating scores

    Args:
      ref: A reference corpus
      out: An output corpus

    Returns:
      A tuple of cached statistics
    """
        if hasattr(self, 'case_insensitive') and self.case_insensitive:
            ref = corpus_utils.lower(ref)
            out = corpus_utils.lower(out)

        cached_scores = []
        for r, o in zip(ref, out):
            cached_scores.append(self.score_sentence(r, o)[0])

        return cached_scores
コード例 #17
0
ファイル: scorers.py プロジェクト: zgd716/compare-mt
  def cache_stats(self, ref, out):
    """
    Cache sufficient statistics for caculating SacreBLEU score

    Args:
      ref: A reference corpus
      out: An output corpus

    Returns:
      A list of cached statistics
    """
    if self.case_insensitive:
      ref = corpus_utils.lower(ref)
      out = corpus_utils.lower(out)

    cached_stats = []
    for r, o in zip(ref, out):
      re = sacrebleu.corpus_bleu(" ".join(o), " ".join(r))
      cached_stats.append( (re.counts, re.totals, re.sys_len, re.ref_len) )

    return cached_stats
コード例 #18
0
    def score_sentence(self, ref, out, src=None):
        if self.case_insensitive:
            ref = corpus_utils.lower(ref)
            out = corpus_utils.lower(out)

        if self._stemmer:
            ref = [self._stemmer.stem(x) if len(x) > 3 else x for x in ref]
            out = [self._stemmer.stem(x) if len(x) > 3 else x for x in out]

        if self.rouge_type == 'rougeL':
            ref, out = self.tokenize(" ".join(ref)), self.tokenize(
                " ".join(out))
            scores = rouge_scorer._score_lcs(ref, out)
        elif self.rouge_type == 'rougeLsum':
            refs = [self.tokenize(s) for s in self.get_sents(ref)]
            outs = [self.tokenize(s) for s in self.get_sents(out)]
            scores = rouge_scorer._summary_level_lcs(refs, outs)
        elif re.match(r"rouge[0-9]$", self.rouge_type):
            ref, out = self.tokenize(" ".join(ref)), self.tokenize(
                " ".join(out))
            n = int(self.rouge_type[5:])
            if n <= 0:
                raise ValueError(
                    f"rougen requires positive n: {self.rouge_type}")
            ref_ngrams = rouge_scorer._create_ngrams(ref, n)
            out_ngrams = rouge_scorer._create_ngrams(out, n)
            scores = rouge_scorer._score_ngrams(ref_ngrams, out_ngrams)
        else:
            raise ValueError(f"Invalid rouge type: {self.rouge_type}")

        if self.score_type == 'fmeasure':
            score_value = scores.fmeasure
        elif self.score_type == 'precision':
            score_value = scores.precision
        elif self.score_type == 'recall':
            score_value = scores.recall
        else:
            raise ValueError(f"Invalid score type: {self.score_type}")

        return self.scale * score_value, None
コード例 #19
0
    def cache_stats(self, ref, out, src=None):
        """
    Cache sufficient statistics for caculating scores

    Args:
      ref: A reference corpus
      out: An output corpus
      src: A source corpus. Might be ignored or required 
        depending on the metric
    Returns:
      A tuple of cached statistics
    """
        if hasattr(self, 'case_insensitive') and self.case_insensitive:
            ref = corpus_utils.lower(ref)
            out = corpus_utils.lower(out)

        cached_scores = []
        src = [None for _ in ref] if src is None else src
        for r, o, s in zip(ref, out, src):
            cached_scores.append(self.score_sentence(r, o, s)[0])

        return cached_scores
コード例 #20
0
ファイル: scorers.py プロジェクト: awesome-archive/compare-mt
  def score_sentence(self, ref, out):
    """
    Score a single sentence with WER

    Args:
      ref: A reference sentence
      out: An output sentence

    Returns:
      The WER, and None
    """
    if self.case_insensitive:
      ref = corpus_utils.lower(ref)
      out = corpus_utils.lower(out)
  
    sp1 = len(ref)+1
    tp1 = len(out)+1
    scores = np.zeros((sp1, tp1))
    equals = (np.expand_dims(np.array(ref), axis=1) == np.array(out))
    scores[:,0] = range(sp1)
    scores[0,:] = range(tp1)

    # Forward edit distance
    for i in range(0, len(ref)):
      for j in range(0, len(out)):
        my_action = 0 if equals[i,j] else 1
        my_score = scores[i,j] + my_action * self.sub_pen
        del_score = scores[i,j+1] + self.del_pen 
        if del_score < my_score:
          my_score = del_score
        ins_score = scores[i+1,j] + self.ins_pen 
        if ins_score < my_score:
          my_score = ins_score
        scores[i+1,j+1] = my_score

    return scores[-1,-1], None
コード例 #21
0
    def calc_bucketed_likelihoods(self, corpus, likelihoods):
        """
    Calculate the average of log likelihoods, bucketed by the type of word/label we have
    This must be used with a subclass that has self.bucket_strs defined, and self.calc_bucket(word) implemented.

    Args:
      corpus: The text/label corpus over which we compute the likelihoods
      likelihoods: The log-likelihoods corresponding to each word/label in the corpus

    Returns:
      the average log-likelihood bucketed by the type of word/label we have
    """
        if not hasattr(self, 'case_insensitive'):
            self.case_insensitive = False

        if type(corpus) == str:
            corpus = corpus_utils.load_tokens(corpus)
        bucketed_likelihoods = [[0.0, 0] for _ in self.bucket_strs]
        if len(corpus) != len(likelihoods):
            raise ValueError(
                "Corpus and likelihoods should have the same size.")
        for sent, list_of_likelihoods in zip(corpus, likelihoods):
            if len(sent) != len(list_of_likelihoods):
                raise ValueError(
                    "Each sentence of the corpus should have likelihood value for each word"
                )

            for word, ll in zip(sent, list_of_likelihoods):
                if self.case_insensitive:
                    word = corpus_utils.lower(word)
                bucket = self.calc_bucket(word, label=word)
                bucketed_likelihoods[bucket][0] += ll
                bucketed_likelihoods[bucket][1] += 1

        for ll, count in bucketed_likelihoods:
            if count != 0:
                yield ll / float(count)
            else:
                yield "NA"  # not applicable
コード例 #22
0
def generate_ngram_report(ref, outs,
                       min_ngram_length=1, max_ngram_length=4,
                       report_length=50, alpha=1.0, compare_type='match',
                       ref_labels=None, out_labels=None,
                       compare_directions='0-1',
                       case_insensitive=False):
  """
  Generate a report comparing aggregate n-gram statistics in both plain text and graphs

  Args:
    ref: Tokens from the reference
    outs: Tokens from the output file(s)
    min_ngram_length: minimum n-gram length
    max_ngram_length: maximum n-gram length
    report_length: the number of n-grams to report
    alpha: when sorting n-grams for salient features, the smoothing coefficient. A higher smoothing coefficient
           will result in more frequent phenomena (sometimes this is good).
    compare_type: what type of statistic to compare
                  (match: n-grams that match the reference, over: over-produced ngrams, under: under-produced ngrams)
    ref_labels: either a filename of a file full of reference labels, or a list of strings corresponding to `ref`.
                If specified, will aggregate statistics over labels instead of n-grams.
    out_labels: output labels. must be specified if ref_labels is specified.
    compare_directions: A string specifying which systems to compare
    case_insensitive: A boolean specifying whether to turn on the case insensitive option
  """
  min_ngram_length, max_ngram_length, report_length = int(min_ngram_length), int(max_ngram_length), int(report_length)
  alpha = float(alpha)
  case_insensitive = True if case_insensitive == 'True' else False

  if out_labels is not None:
    out_labels = arg_utils.parse_files(out_labels)
    if len(out_labels) != len(outs):
      raise ValueError(f'The number of output files should be equal to the number of output labels.')

  if type(ref_labels) == str:
    label_files_str = f'    ref_labels={ref_labels},'
    for i, out_label in enumerate(out_labels):
      label_files_str += f' out{i}_labels={out_label},'
    label_files = (label_files_str)
  else:
    label_files = None

  if type(alpha) == str:
    alpha = float(alpha)

  if not type(ref_labels) == str and case_insensitive:
    ref = corpus_utils.lower(ref)
    outs = [corpus_utils.lower(out) for out in outs]

  ref_labels = corpus_utils.load_tokens(ref_labels) if type(ref_labels) == str else ref_labels
  out_labels = [corpus_utils.load_tokens(out_labels[i]) if not out_labels is None else None for i in range(len(outs))]
  totals, matches, overs, unders = zip(*[ngram_utils.compare_ngrams(ref, out, ref_labels=ref_labels, out_labels=out_label,
                                                             min_length=min_ngram_length, max_length=max_ngram_length) for out, out_label in zip(outs, out_labels)])
  direcs = arg_utils.parse_compare_directions(compare_directions)
  scores = []
  for (left, right) in direcs:
    if compare_type == 'match':
      scores.append(stat_utils.extract_salient_features(matches[left], matches[right], alpha=alpha))
    elif compare_type == 'over':
      scores.append(stat_utils.extract_salient_features(overs[left], overs[right], alpha=alpha))
    elif compare_type == 'under':
      scores.append(stat_utils.extract_salient_features(unders[left], unders[right], alpha=alpha))
    else:
      raise ValueError(f'Illegal compare_type "{compare_type}"')
  scorelist = [sorted(score.items(), key=operator.itemgetter(1), reverse=True) for score in scores]

  reporter = reporters.NgramReport(scorelist=scorelist, report_length=report_length,
                                   min_ngram_length=min_ngram_length, 
                                   max_ngram_length=max_ngram_length,
                                   matches=matches,
                                   compare_type=compare_type, alpha=alpha,
                                   compare_directions=direcs,
                                   label_files=label_files)                                   
  reporter.generate_report(output_fig_file=f'ngram-min{min_ngram_length}-max{max_ngram_length}-{compare_type}',
                           output_fig_format='pdf', 
                           output_directory='outputs')
  return reporter 
コード例 #23
0
 def calc_bucket(self, word, label=None):
     if self.case_insensitive:
         word = corpus_utils.lower(word)
     return self.cutoff_into_bucket(self.freq_counts.get(word, 0))
コード例 #24
0
ファイル: bucketers.py プロジェクト: shankar0206/compare-mt
    def calc_bucketed_matches(self,
                              ref,
                              out,
                              ref_labels=None,
                              out_labels=None):
        """
    Calculate the number of matches, bucketed by the type of word we have
    This must be used with a subclass that has self.bucket_strs defined, and self.calc_bucket(word) implemented.

    Args:
      ref: The reference corpus
      out: The output corpus
      ref_labels: Labels of the reference corpus (optional)
      out_labels: Labels of the output corpus (should be specified iff ref_labels is)

    Returns:
      A tuple containing:
        both_tot: the frequency of a particular bucket appearing in both output and reference
        ref_tot: the frequency of a particular bucket appearing in just reference
        out_tot: the frequency of a particular bucket appearing in just output
        rec: recall of the bucket
        prec: precision of the bucket
        fmeas: f1-measure of the bucket
    """
        if not hasattr(self, 'case_insensitive'):
            self.case_insensitive = False

        ref_labels = ref_labels if ref_labels else []
        out_labels = out_labels if out_labels else []
        matches = [[0, 0, 0] for x in self.bucket_strs]
        for ref_sent, out_sent, ref_lab, out_lab in itertools.zip_longest(
                ref, out, ref_labels, out_labels):
            ref_pos = defaultdict(lambda: [])
            for i, word in enumerate(ref_sent):
                if self.case_insensitive:
                    word = corpus_utils.lower(word)
                ref_pos[word].append(i)
            for i, word in enumerate(out_sent):
                if self.case_insensitive:
                    word = corpus_utils.lower(word)
                if len(ref_pos[word]) > 0:
                    ri = ref_pos[word][0]
                    ref_pos[word] = ref_pos[word][1:]
                    bucket = self.calc_bucket(
                        word,
                        ref_label=ref_lab[ri] if ref_lab else None,
                        out_label=out_lab[i] if out_lab else None)
                    matches[bucket][0] += 1
                    matches[bucket][1] += 1
                else:
                    bucket = self.calc_bucket(
                        word, out_label=out_lab[i] if out_lab else None)
                matches[bucket][2] += 1
            for word, my_pos in ref_pos.items():
                if len(my_pos) > 0:
                    for ri in my_pos:
                        bucket = self.calc_bucket(
                            ref_sent[ri],
                            ref_label=ref_lab[ri] if ref_lab else None)
                        matches[bucket][1] += 1
        for both_tot, ref_tot, out_tot in matches:
            if both_tot == 0:
                rec, prec, fmeas = 0.0, 0.0, 0.0
            else:
                rec = both_tot / float(ref_tot)
                prec = both_tot / float(out_tot)
                fmeas = 2 * prec * rec / (prec + rec)
            yield both_tot, ref_tot, out_tot, rec, prec, fmeas