def test_logits(self): model = TFOPTForCausalLM.from_pretrained(self.path_model) tokenizer = GPT2Tokenizer.from_pretrained(self.path_model) prompts = [ "Today is a beautiful day and I want to", "In the city of", "Paris is the capital of France and", "Computers and mobile phones have taken", ] # verify that prompt without BOS token is identical to Metaseq -> add_special_tokens=False inputs = tokenizer(prompts, return_tensors="tf", padding=True, add_special_tokens=False) logits = tf.math.reduce_mean(model(inputs.input_ids, attention_mask=inputs.attention_mask)[0], axis=-1) logits_meta = tf.constant( [ [1.3851, -13.8923, -10.5229, -10.7533, -0.2309, -10.2384, -0.5365, -9.0947, -5.1670], [-4.7073, -10.6276, -3.9415, -21.5242, -0.2822, -0.2822, -0.2822, -0.2822, -0.2822], [0.6247, -3.4229, -8.9179, -1.4297, -14.1650, 1.4146, -9.0218, -0.2703, -0.2703], [6.4783, -1.9913, -10.7926, -2.3336, 1.5092, -0.9974, -6.8213, 1.3477, 1.3477], ] ) self.assertTrue(np.allclose(logits, logits_meta, atol=1e-4)) xla_generate = tf.function(model, jit_compile=True) logits = tf.math.reduce_mean(xla_generate(inputs.input_ids, attention_mask=inputs.attention_mask)[0], axis=-1) self.assertTrue(np.allclose(logits, logits_meta, atol=1e-4))
def test_batch_generation(self): model_id = "facebook/opt-350m" tokenizer = GPT2Tokenizer.from_pretrained(model_id) model = TFOPTForCausalLM.from_pretrained(model_id) tokenizer.padding_side = "left" # use different length sentences to test batching sentences = [ "Hello, my dog is a little", "Today, I", ] inputs = tokenizer(sentences, return_tensors="tf", padding=True) input_ids = inputs["input_ids"] outputs = model.generate(input_ids=input_ids, attention_mask=inputs["attention_mask"]) inputs_non_padded = tokenizer(sentences[0], return_tensors="tf").input_ids output_non_padded = model.generate(input_ids=inputs_non_padded) num_paddings = inputs_non_padded.shape[-1] - tf.math.reduce_sum( tf.cast(inputs["attention_mask"][-1], tf.int64)) inputs_padded = tokenizer(sentences[1], return_tensors="tf").input_ids output_padded = model.generate(input_ids=inputs_padded, max_length=model.config.max_length - num_paddings) batch_out_sentence = tokenizer.batch_decode(outputs, skip_special_tokens=True) non_padded_sentence = tokenizer.decode(output_non_padded[0], skip_special_tokens=True) padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True) expected_output_sentence = [ "Hello, my dog is a little bit of a dork.\nI'm a little bit", "Today, I was in the middle of a conversation with a friend about the", ] self.assertListEqual(expected_output_sentence, batch_out_sentence) self.assertListEqual(batch_out_sentence, [non_padded_sentence, padded_sentence])
def test_generation_post_attn_layer_norm(self): model_id = "facebook/opt-350m" EXPECTED_OUTPUTS = [ "Today is a beautiful day and I want to", "In the city of San Francisco, the city", "Paris is the capital of France and the capital", "Computers and mobile phones have taken over the", ] predicted_outputs = [] tokenizer = GPT2Tokenizer.from_pretrained(model_id) model = TFOPTForCausalLM.from_pretrained(model_id) for prompt in self.prompts: input_ids = tokenizer(prompt, return_tensors="tf").input_ids generated_ids = model.generate(input_ids, max_length=10) generated_string = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) predicted_outputs += generated_string self.assertListEqual(predicted_outputs, EXPECTED_OUTPUTS)