Example #1
0
 def test_fmeasure_error(self):
     with self.assertRaises(Exception) as context:
         metrics.SegmentationFMeasureEvaluator().evaluate_one_sent(
             "aaa b".split(), "aaa".split())
Example #2
0
            t = line.strip()
            if post_process is not None:
                t = post_process(t)
            data.append(t)
    return data


eval_shortcuts = {
    "bleu": lambda: metrics.BLEUEvaluator(),
    "gleu": lambda: metrics.GLEUEvaluator(),
    "wer": lambda: metrics.WEREvaluator(),
    "cer": lambda: metrics.CEREvaluator(),
    "recall": lambda: metrics.RecallEvaluator(),
    "accuracy": lambda: metrics.SequenceAccuracyEvaluator(),
    "fmeasure": lambda: metrics.FMeasureEvaluator(),
    "seg_fmeasure": lambda: metrics.SegmentationFMeasureEvaluator(),
    "rnng_parse_fmeasure": lambda: metrics.RNNGParseFMeasureEvaluator(),
}


def xnmt_evaluate(ref_file: Union[str, Sequence[str]],
                  hyp_file: Union[str, Sequence[str]],
                  evaluators: Sequence[metrics.Evaluator],
                  desc: Any = None) -> Sequence[metrics.EvalScore]:
    """"Returns the eval score (e.g. BLEU) of the hyp sents using reference trg sents

  Args:
    ref_file: path of the reference file
    hyp_file: path of the hypothesis trg file
    evaluators: Evaluation metrics. Can be a list of evaluator objects, or a shortcut string
    desc: descriptive string passed on to evaluators
Example #3
0
 def test_fmeasure(self):
     self.assertEqual(
         metrics.SegmentationFMeasureEvaluator().evaluate_one_sent(
             "ab c def".split(), "a bc def".split()).value(), 0.80)