def create_and_check_gptj_model_attention_mask_past( self, config, input_ids, input_mask, head_mask, token_type_ids, *args): model = TFGPTJModel(config=config) # create attention mask half_seq_length = self.seq_length // 2 attn_mask_begin = tf.ones((self.batch_size, half_seq_length), dtype=tf.int32) attn_mask_end = tf.zeros( (self.batch_size, self.seq_length - half_seq_length), dtype=tf.int32) attn_mask = tf.concat([attn_mask_begin, attn_mask_end], axis=1) # first forward pass output, past = model(input_ids, attention_mask=attn_mask).to_tuple() # create hypothetical next token and extent to next_input_ids next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) # change a random masked slice from input_ids random_seq_idx_to_change = ids_tensor( (1, ), half_seq_length).numpy() + 1 random_other_next_tokens = ids_tensor( (self.batch_size, self.seq_length), config.vocab_size) vector_condition = tf.range( self.seq_length) == (self.seq_length - random_seq_idx_to_change) condition = tf.transpose( tf.broadcast_to(tf.expand_dims(vector_condition, -1), (self.seq_length, self.batch_size))) input_ids = tf.where(condition, random_other_next_tokens, input_ids) # append to next input_ids and attn_mask next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) attn_mask = tf.concat([ attn_mask, tf.ones((shape_list(attn_mask)[0], 1), dtype=tf.int32) ], axis=1) # get two different outputs output_from_no_past = model( next_input_ids, attention_mask=attn_mask)["last_hidden_state"] output_from_past = model(next_tokens, past=past, attention_mask=attn_mask)["last_hidden_state"] # select random slice random_slice_idx = int( ids_tensor((1, ), shape_list(output_from_past)[-1])) output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx] output_from_past_slice = output_from_past[:, 0, random_slice_idx] # test that outputs are equal for slice tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-12)
def create_and_check_gptj_model_past(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): model = TFGPTJModel(config=config) # first forward pass outputs = model(input_ids, token_type_ids=token_type_ids, use_cache=True) outputs_use_cache_conf = model(input_ids, token_type_ids=token_type_ids) outputs_no_past = model(input_ids, token_type_ids=token_type_ids, use_cache=False) self.parent.assertTrue(len(outputs) == len(outputs_use_cache_conf)) self.parent.assertTrue(len(outputs) == len(outputs_no_past) + 1) output, past = outputs.to_tuple() # create hypothetical next token and extent to next_input_ids next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) next_token_types = ids_tensor([self.batch_size, 1], self.type_vocab_size) # append to next input_ids and token_type_ids next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) next_token_type_ids = tf.concat([token_type_ids, next_token_types], axis=-1) output_from_no_past = model(next_input_ids, token_type_ids=next_token_type_ids)["last_hidden_state"] output_from_past = model(next_tokens, token_type_ids=next_token_types, past=past)["last_hidden_state"] # select random slice random_slice_idx = int(ids_tensor((1,), shape_list(output_from_past)[-1])) output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx] output_from_past_slice = output_from_past[:, 0, random_slice_idx] # test that outputs are equal for slice tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-6)
def create_and_check_gptj_model_past_large_inputs(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): model = TFGPTJModel(config=config) input_ids = input_ids[:1, :] input_mask = input_mask[:1, :] token_type_ids = token_type_ids[:1, :] self.batch_size = 1 # first forward pass outputs = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, use_cache=True) output, past = outputs.to_tuple() # create hypothetical next token and extent to next_input_ids next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size) next_attn_mask = ids_tensor((self.batch_size, 3), 2) next_token_types = ids_tensor((self.batch_size, 3), self.type_vocab_size) # append to next input_ids and token_type_ids next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) next_attention_mask = tf.concat([input_mask, next_attn_mask], axis=-1) next_token_type_ids = tf.concat([token_type_ids, next_token_types], axis=-1) output_from_no_past = model( next_input_ids, token_type_ids=next_token_type_ids, attention_mask=next_attention_mask)["last_hidden_state"] output_from_past = model(next_tokens, token_type_ids=next_token_types, attention_mask=next_attention_mask, past=past)["last_hidden_state"] self.parent.assertTrue( output_from_past.shape[1] == next_tokens.shape[1]) # select random slice random_slice_idx = int( ids_tensor((1, ), shape_list(output_from_past)[-1])) output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx] output_from_past_slice = output_from_past[:, :, random_slice_idx] # test that outputs are equal for slice tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
def test_batch_left_padding(self): # Confirms that left-padding is working properly model, tokenizer, sentences, expected_output_sentences = self._get_beam_search_test_objects() inputs = tokenizer(sentences, return_tensors="tf", padding=True) inputs_non_padded = tokenizer(sentences[0], return_tensors="tf") output_non_padded = model.generate(**inputs_non_padded, do_sample=False, num_beams=2) num_paddings = ( shape_list(inputs_non_padded["input_ids"])[-1] - tf.reduce_sum(tf.cast(inputs["attention_mask"][-1], tf.int64)).numpy() ) inputs_padded = tokenizer(sentences[1], return_tensors="tf") output_padded = model.generate( **inputs_padded, do_sample=False, num_beams=2, max_length=model.config.max_length - num_paddings ) non_padded_sentence = tokenizer.decode(output_non_padded[0], skip_special_tokens=True) padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True) self.assertListEqual(expected_output_sentences, [non_padded_sentence, padded_sentence])
def test_batch_generation(self): # Marked as @tooslow due to GPU OOM model = TFGPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", revision="float16", from_pt=True) tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B", revision="float16") tokenizer.padding_side = "left" # Define PAD Token = EOS Token = 50256 tokenizer.pad_token = tokenizer.eos_token model.config.pad_token_id = model.config.eos_token_id # use different length sentences to test batching sentences = [ "Hello, my dog is a little", "Today, I", ] inputs = tokenizer(sentences, return_tensors="tf", padding=True) input_ids = inputs["input_ids"] token_type_ids = tf.concat( [ tf.zeros((input_ids.shape[0], input_ids.shape[1] - 1), dtype=tf.int64), 500 * tf.ones((input_ids.shape[0], 1), dtype=tf.int64), ], axis=-1, ) outputs = model.generate(input_ids=input_ids, attention_mask=inputs["attention_mask"]) outputs_tt = model.generate( input_ids=input_ids, attention_mask=inputs["attention_mask"], token_type_ids=token_type_ids, ) inputs_non_padded = tokenizer(sentences[0], return_tensors="tf").input_ids output_non_padded = model.generate(input_ids=inputs_non_padded) num_paddings = (shape_list(inputs_non_padded)[-1] - tf.reduce_sum( tf.cast(inputs["attention_mask"][-1], tf.int64)).numpy()) inputs_padded = tokenizer(sentences[1], return_tensors="tf").input_ids output_padded = model.generate(input_ids=inputs_padded, max_length=model.config.max_length - num_paddings) batch_out_sentence = tokenizer.batch_decode(outputs, skip_special_tokens=True) batch_out_sentence_tt = tokenizer.batch_decode( outputs_tt, skip_special_tokens=True) non_padded_sentence = tokenizer.decode(output_non_padded[0], skip_special_tokens=True) padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True) expected_output_sentence = [ "Hello, my dog is a little over a year old and has been diagnosed with a heart murmur", "Today, I’m going to share with you a few of my favorite", ] self.assertListEqual(expected_output_sentence, batch_out_sentence) self.assertTrue( batch_out_sentence_tt != batch_out_sentence) # token_type_ids should change output self.assertListEqual(expected_output_sentence, [non_padded_sentence, padded_sentence])