def test_compute(self): corpusblue_metrics = metrics.CorpusBLEU() # All correct predictions. result = corpusblue_metrics.compute( ['This is a test.', 'Test two', 'A third test example'], ['This is a test.', 'Test two', 'A third test example'], types.GeneratedText(), types.GeneratedText()) self.assertAlmostEqual(result, {'corpus_bleu': 100.00000000000004}) # Some incorrect predictions. result = corpusblue_metrics.compute( ['This is a test.', 'Test one', 'A third test'], ['This is a test.', 'Test two', 'A third test example'], types.GeneratedText(), types.GeneratedText()) self.assertAlmostEqual(result, {'corpus_bleu': 68.037493331712}) # All incorrect predictions. result = corpusblue_metrics.compute( ['This is a test.', 'Test one', 'A third test'], ['these test.', 'Test two', 'A third test example'], types.GeneratedText(), types.GeneratedText()) self.assertAlmostEqual(result, {'corpus_bleu': 0.0}) # Empty labels and predictions result = corpusblue_metrics.compute([], [], types.GeneratedText(), types.GeneratedText()) self.assertAlmostEqual(result, {})
def test_compute(self): corpusblue_metrics = metrics.CorpusBLEU() # All correct predictions. result = corpusblue_metrics.compute( ['This is a test.', 'Test two', 'A third test example'], ['This is a test.', 'Test two', 'A third test example'], types.GeneratedText(), types.GeneratedText()) testing_utils.assert_dicts_almost_equal(self, result, {'corpus_bleu': 100.00000}) # Some incorrect predictions. result = corpusblue_metrics.compute( ['This is a test.', 'Test one', 'A third test'], ['This is a test.', 'Test two', 'A third test example'], types.GeneratedText(), types.GeneratedText()) testing_utils.assert_dicts_almost_equal(self, result, {'corpus_bleu': 68.037493}) result = corpusblue_metrics.compute( ['This is a test.', 'Test one', 'A third test'], ['these test.', 'Test two', 'A third test example'], types.GeneratedText(), types.GeneratedText()) testing_utils.assert_dicts_almost_equal( self, result, {'corpus_bleu': 29.508062388758525}) # Empty labels and predictions result = corpusblue_metrics.compute([], [], types.GeneratedText(), types.GeneratedText()) testing_utils.assert_dicts_almost_equal(self, result, {})
def test_find_spec_keys(self): spec = { "score": types.RegressionScore(), "scalar_foo": types.Scalar(), "text": types.TextSegment(), "emb_0": types.Embeddings(), "emb_1": types.Embeddings(), "tokens": types.Tokens(), "generated_text": types.GeneratedText(), } self.assertEqual(["score"], utils.find_spec_keys(spec, types.RegressionScore)) self.assertEqual(["text", "tokens", "generated_text"], utils.find_spec_keys(spec, (types.TextSegment, types.Tokens))) self.assertEqual(["emb_0", "emb_1"], utils.find_spec_keys(spec, types.Embeddings)) self.assertEqual([], utils.find_spec_keys(spec, types.AttentionHeads)) # Check subclasses self.assertEqual( list(spec.keys()), utils.find_spec_keys(spec, types.LitType)) self.assertEqual(["text", "generated_text"], utils.find_spec_keys(spec, types.TextSegment)) self.assertEqual(["score", "scalar_foo"], utils.find_spec_keys(spec, types.Scalar))
def test_is_compatible(self): multiclass_metrics = metrics.MulticlassMetrics() # Only compatible with MulticlassPreds spec. self.assertTrue( multiclass_metrics.is_compatible( types.MulticlassPreds(vocab=['']))) self.assertFalse( multiclass_metrics.is_compatible(types.RegressionScore())) self.assertFalse( multiclass_metrics.is_compatible(types.GeneratedText()))
def test_is_compatible(self): corpusblue_metrics = metrics.CorpusBLEU() # Only compatible with GeneratedText spec. self.assertTrue(corpusblue_metrics.is_compatible( types.GeneratedText())) self.assertFalse( corpusblue_metrics.is_compatible( types.MulticlassPreds(vocab=['']))) self.assertFalse( corpusblue_metrics.is_compatible(types.RegressionScore()))
def output_spec(self) -> lit_types.Spec: output = { "input_tokens": lit_types.Tokens(parent="input_text"), "predicted": lit_types.GeneratedText(parent='target_text'), 'layer_average': lit_types.AttentionHeads(align=('input_tokens', 'input_tokens')) } for layer in range(self.ATTENTION_LAYERS): output['layer{}'.format(layer)] = lit_types.AttentionHeads( align=('input_tokens', 'input_tokens')) return output
def output_spec(self): spec = { "input_tokens": lit_types.Tokens(parent="input_text"), "generation": lit_types.GeneratedText(parent="target_text"), "encoder_final_embedding": lit_types.Embeddings(), # If target text is given, the following will also be populated. "target_tokens": lit_types.Tokens(parent="target_text"), "pred_tokens": lit_types.TokenTopKPreds(align="target_tokens"), "rougeL": lit_types.Scalar(), } if self.config.output_attention: # Add attention for each layer. for i in range(self.num_layers): spec[f"encoder_layer_{i:d}_attention"] = lit_types.AttentionHeads( align=("input_tokens", "input_tokens")) spec[f"decoder_layer_{i:d}_attention"] = lit_types.AttentionHeads( align=("target_tokens", "target_tokens")) return spec
def output_spec(self) -> lit_types.Spec: return { "src_tokens": lit_types.Tokens(parent="src_text"), "trg_text": lit_types.GeneratedText(parent="ref_text"), "trg_tokens": lit_types.Tokens(parent="trg_text"), "attention": lit_types.AttentionHeads(align_in="src_tokens", align_out="trg_tokens"), "pred_tokens": lit_types.TokenTopKPreds(align="trg_tokens", parent="trg_text"), "encoder_final_embedding": lit_types.Embeddings(), "ter": lit_types.Scalar(), "chrf3": lit_types.Scalar(), }
def output_spec(self): spec = { "output_text": lit_types.GeneratedText(parent="target_text"), "input_tokens": lit_types.Tokens(parent="input_text"), "encoder_final_embedding": lit_types.Embeddings(), # If target text is given, the following will also be populated. "target_tokens": lit_types.Tokens(parent="target_text"), "pred_tokens": lit_types.TokenTopKPreds(align="target_tokens"), } if self.config.num_to_generate > 1: spec["output_text"] = lit_types.GeneratedTextCandidates( parent="target_text") if self.config.output_attention: # Add attention for each layer. for i in range(self.num_layers): spec[ f"encoder_layer_{i+1:d}_attention"] = lit_types.AttentionHeads( align_in="input_tokens", align_out="input_tokens") spec[ f"decoder_layer_{i+1:d}_attention"] = lit_types.AttentionHeads( align_in="target_tokens", align_out="target_tokens") return spec
def output_spec(self): return {"output_text": lit_types.GeneratedText(parent="target_text")}