def testComputeNotEqual(self):
   translation_corpus = [[1, 2, 3, 4]]
   reference_corpus = [[5, 6, 7, 8]]
   bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus)
   # The smoothing prevents 0 for small corpora
   actual_bleu = 0.0798679
   self.assertAllClose(bleu, actual_bleu, atol=1e-03)
def testBLEU():
    translation_corpus = [['how', 'do', 'i', 'learn', 'linux']]
    reference_corpus = [[
        'how', 'do', 'i', 'learn', 'networking', 'with', 'linux'
    ]]
    bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus)
    print(bleu)
 def testComputeNotEqual(self):
     translation_corpus = [[1, 2, 3, 4]]
     reference_corpus = [[5, 6, 7, 8]]
     bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus)
     # The smoothing prevents 0 for small corpora
     actual_bleu = 0.0798679
     self.assertAllClose(bleu, actual_bleu, atol=1e-03)
Exemple #4
0
def t2t_bleu(targets, predictions):
  """Tokenizes with the bleu_tokenize method from the t2t library then
  calls the compute_bleu function

  Args:
    targets: a list of strings, the target from the validation set
    preditcions: a list of strings, the model predictions

  Returns:
    a dictionary: {"t2t_bleu": bleu_value}
  """
  targets_tokens = [bleu_hook.bleu_tokenize(x) for x in targets]
  predictions_tokens = [bleu_hook.bleu_tokenize(x) for x in predictions]
  return {"t2t_bleu": 100 * bleu_hook.compute_bleu(targets_tokens,
                                                   predictions_tokens)}
Exemple #5
0
    def eval_metrics(self):
        def get_tokens(program):
            return [self.tokens[x] for x in program[:, 0].flatten().tolist()]

        def get_functions(program):
            return [x for x in program[:, 0] if self.tokens[x].startswith('@')]

        def get_devices(program):
            return [
                self.tokens[x].rsplit('.', 1)[0] for x in program[:, 0]
                if self.tokens[x].startswith('@')
            ]

        def accuracy_without_parameters(predictions, labels, features):
            batch_size, predictions, labels = adjust_predictions_labels(
                predictions, labels, num_elements_per_time=3)
            weights = tf.ones((batch_size, ), dtype=tf.float32)
            ok = tf.to_float(
                tf.reduce_all(tf.equal(predictions[:, :, 0], labels[:, :, 0]),
                              axis=1))
            return ok, weights

        return {
            "accuracy":
            accuracy,
            "grammar_accuracy":
            grammar_accuracy,
            "device_accuracy":
            make_pyfunc_metric_fn(
                lambda pred, label: get_devices(pred) == get_devices(label)),
            "function_accuracy":
            make_pyfunc_metric_fn(lambda pred, label: get_functions(pred) ==
                                  get_functions(label)),
            "accuracy_without_parameters":
            accuracy_without_parameters,
            "bleu_score":
            make_pyfunc_metric_fn(lambda pred, label: compute_bleu(
                [get_tokens(pred)], [get_tokens(label)])),
            "num_function_accuracy":
            make_pyfunc_metric_fn(lambda pred, label: len(get_functions(pred))
                                  == len(get_functions(label))),
            "token_f1_accuracy":
            make_pyfunc_metric_fn(lambda pred, label: compute_f1_score(
                get_tokens(pred), get_tokens(label)))
        }
 def testComputeMultipleNgrams(self):
   reference_corpus = [[1, 2, 1, 13], [12, 6, 7, 4, 8, 9, 10]]
   translation_corpus = [[1, 2, 1, 3], [5, 6, 7, 4]]
   bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus)
   actual_bleu = 0.3436
   self.assertAllClose(bleu, actual_bleu, atol=1e-03)
 def testComputeMultipleBatch(self):
   translation_corpus = [[1, 2, 3, 4], [5, 6, 7, 0]]
   reference_corpus = [[1, 2, 3, 4], [5, 6, 7, 10]]
   bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus)
   actual_bleu = 0.7231
   self.assertAllClose(bleu, actual_bleu, atol=1e-03)
 def testComputeBleuEqual(self):
   translation_corpus = [[1, 2, 3]]
   reference_corpus = [[1, 2, 3]]
   bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus)
   actual_bleu = 1.0
   self.assertEqual(bleu, actual_bleu)
 def testComputeMultipleNgrams(self):
     reference_corpus = [[1, 2, 1, 13], [12, 6, 7, 4, 8, 9, 10]]
     translation_corpus = [[1, 2, 1, 3], [5, 6, 7, 4]]
     bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus)
     actual_bleu = 0.3436
     self.assertAllClose(bleu, actual_bleu, atol=1e-03)
 def testComputeMultipleBatch(self):
     translation_corpus = [[1, 2, 3, 4], [5, 6, 7, 0]]
     reference_corpus = [[1, 2, 3, 4], [5, 6, 7, 10]]
     bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus)
     actual_bleu = 0.7231
     self.assertAllClose(bleu, actual_bleu, atol=1e-03)
 def testComputeBleuEqual(self):
     translation_corpus = [[1, 2, 3]]
     reference_corpus = [[1, 2, 3]]
     bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus)
     actual_bleu = 1.0
     self.assertEqual(bleu, actual_bleu)
 def testComputeNotEqual(self):
     translation_corpus = [[1, 2, 3, 4]]
     reference_corpus = [[5, 6, 7, 8]]
     bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus)
     actual_bleu = 0.0
     self.assertEqual(bleu, actual_bleu)
 def testComputeNotEqual(self):
   translation_corpus = [[1, 2, 3, 4]]
   reference_corpus = [[5, 6, 7, 8]]
   bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus)
   actual_bleu = 0.0
   self.assertEqual(bleu, actual_bleu)
Exemple #14
0
def evaluate(reference_corpus, translation_corpus):
    bleu = bleu_hook.compute_bleu(reference_corpus, translation_corpus)
    return bleu