def test_gptj_sample(self): tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B") model.to(torch_device) torch.manual_seed(0) tokenized = tokenizer("Today is a nice day and", return_tensors="pt", return_token_type_ids=True) input_ids = tokenized.input_ids.to(torch_device) output_ids = model.generate(input_ids, do_sample=True) output_str = tokenizer.decode(output_ids[0], skip_special_tokens=True) token_type_ids = tokenized.token_type_ids.to(torch_device) output_seq = model.generate(input_ids=input_ids, do_sample=True, num_return_sequences=5) output_seq_tt = model.generate(input_ids=input_ids, token_type_ids=token_type_ids, do_sample=True, num_return_sequences=5) output_seq_strs = tokenizer.batch_decode(output_seq, skip_special_tokens=True) output_seq_tt_strs = tokenizer.batch_decode(output_seq_tt, skip_special_tokens=True) EXPECTED_OUTPUT_STR = "Today is a nice day and I've already been enjoying it. I walked to work with my wife" self.assertEqual(output_str, EXPECTED_OUTPUT_STR) self.assertTrue( all([ output_seq_strs[idx] != output_seq_tt_strs[idx] for idx in range(len(output_seq_tt_strs)) ])) # token_type_ids should change output
def test_lm_generate_gptj(self): for checkpointing in [True, False]: model = GPTJForCausalLM.from_pretrained( "EleutherAI/gpt-j-6B", gradient_checkpointing=checkpointing) model.to(torch_device) input_ids = torch.tensor([[464, 3290]], dtype=torch.long, device=torch_device) # The dog expected_output_ids = [ 464, 3290, 1528, 286, 3931, 389, 2402, 514, 11, 290, 326, 1724, 340, 447, 247, 82, 640, 284, 923, 3612, ] # The dog days of summer are upon us, and that means it’s time to start thinking output_ids = model.generate(input_ids, do_sample=False) self.assertListEqual(output_ids[0].tolist(), expected_output_ids)
def test_gptj_sample(self): # Marked as @tooslow due to GPU OOM (issue #13676) tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B", revision="float16") model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", revision="float16", torch_dtype=torch.float16) model.to(torch_device) torch.manual_seed(0) tokenized = tokenizer("Today is a nice day and", return_tensors="pt", return_token_type_ids=True) input_ids = tokenized.input_ids.to(torch_device) output_ids = model.generate(input_ids, do_sample=True) output_str = tokenizer.decode(output_ids[0], skip_special_tokens=True) token_type_ids = tokenized.token_type_ids.to(torch_device) output_seq = model.generate(input_ids=input_ids, do_sample=True, num_return_sequences=5) output_seq_tt = model.generate( input_ids=input_ids, token_type_ids=token_type_ids, do_sample=True, num_return_sequences=5 ) output_seq_strs = tokenizer.batch_decode(output_seq, skip_special_tokens=True) output_seq_tt_strs = tokenizer.batch_decode(output_seq_tt, skip_special_tokens=True) if torch_device == "cuda": EXPECTED_OUTPUT_STR = ( "Today is a nice day and I've already been enjoying it. I walked to work with my wife" ) else: EXPECTED_OUTPUT_STR = "Today is a nice day and one of those days that feels a bit more alive. I am ready" self.assertEqual(output_str, EXPECTED_OUTPUT_STR) self.assertTrue( all([output_seq_strs[idx] != output_seq_tt_strs[idx] for idx in range(len(output_seq_tt_strs))]) ) # token_type_ids should change output
def test_gptj_sample_max_time(self): tokenizer = AutoTokenizer.from_pretrained("anton-l/gpt-j-tiny-random") model = GPTJForCausalLM.from_pretrained("anton-l/gpt-j-tiny-random") model.to(torch_device) torch.manual_seed(0) tokenized = tokenizer("Today is a nice day and", return_tensors="pt", return_token_type_ids=True) input_ids = tokenized.input_ids.to(torch_device) MAX_TIME = 0.5 start = datetime.datetime.now() model.generate(input_ids, do_sample=True, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=False, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=False, num_beams=2, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=True, num_beams=2, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=False, max_time=None, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))
def test_batch_generation(self): # Marked as @tooslow due to GPU OOM model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", revision="float16", torch_dtype=torch.float16) model.to(torch_device) tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B", revision="float16") tokenizer.padding_side = "left" # Define PAD Token = EOS Token = 50256 tokenizer.pad_token = tokenizer.eos_token model.config.pad_token_id = model.config.eos_token_id # use different length sentences to test batching sentences = [ "Hello, my dog is a little", "Today, I", ] inputs = tokenizer(sentences, return_tensors="pt", padding=True) input_ids = inputs["input_ids"].to(torch_device) token_type_ids = torch.cat( [ input_ids.new_full((input_ids.shape[0], input_ids.shape[1] - 1), 0), input_ids.new_full((input_ids.shape[0], 1), 500), ], dim=-1, ) outputs = model.generate( input_ids=input_ids, attention_mask=inputs["attention_mask"].to(torch_device), ) outputs_tt = model.generate( input_ids=input_ids, attention_mask=inputs["attention_mask"].to(torch_device), token_type_ids=token_type_ids, ) inputs_non_padded = tokenizer(sentences[0], return_tensors="pt").input_ids.to(torch_device) output_non_padded = model.generate(input_ids=inputs_non_padded) num_paddings = inputs_non_padded.shape[-1] - inputs["attention_mask"][-1].long().sum().cpu().item() inputs_padded = tokenizer(sentences[1], return_tensors="pt").input_ids.to(torch_device) output_padded = model.generate(input_ids=inputs_padded, max_length=model.config.max_length - num_paddings) batch_out_sentence = tokenizer.batch_decode(outputs, skip_special_tokens=True) batch_out_sentence_tt = tokenizer.batch_decode(outputs_tt, skip_special_tokens=True) non_padded_sentence = tokenizer.decode(output_non_padded[0], skip_special_tokens=True) padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True) expected_output_sentence = [ "Hello, my dog is a little over a year old and has been diagnosed with a heart murmur", "Today, I’m going to talk about the most important thing in the", ] self.assertListEqual(expected_output_sentence, batch_out_sentence) self.assertTrue(batch_out_sentence_tt != batch_out_sentence) # token_type_ids should change output self.assertListEqual(expected_output_sentence, [non_padded_sentence, padded_sentence])
def test_lm_generate_gptj(self): # Marked as @tooslow due to GPU OOM for checkpointing in [True, False]: model = GPTJForCausalLM.from_pretrained( "EleutherAI/gpt-j-6B", revision="float16", torch_dtype=torch.float16 ) if checkpointing: model.gradient_checkpointing_enable() else: model.gradient_checkpointing_disable() model.to(torch_device) input_ids = torch.tensor([[464, 3290]], dtype=torch.long, device=torch_device) # The dog # fmt: off # The dog is a man's best friend. It is a loyal companion, and it is a friend expected_output_ids = [464, 3290, 318, 257, 582, 338, 1266, 1545, 13, 632, 318, 257, 9112, 15185, 11, 290, 340, 318, 257, 1545] # fmt: on output_ids = model.generate(input_ids, do_sample=False) self.assertListEqual(output_ids[0].tolist(), expected_output_ids)
from transformers import GPTJForCausalLM, AutoTokenizer import torch from transformers import GPTJForCausalLM import torch model = GPTJForCausalLM.from_pretrained("~/projects/gpt4chan_model_float16/", revision="float16", torch_dtype=torch.float16, low_cpu_mem_usage=True) model.cuda() tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B") prompt = ( "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " "previously unexplored valley, in the Andes Mountains. Even more surprising to the " "researchers was the fact that the unicorns spoke perfect English.") input_ids = tokenizer(prompt, return_tensors="pt").input_ids input_ids = input_ids.cuda() gen_tokens = model.generate( input_ids, do_sample=True, temperature=0.8, top_p=0.9, max_length=100, ) gen_text = tokenizer.batch_decode(gen_tokens)[0]