def test_xglm_sample_max_time(self): tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M") model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M") model.to(torch_device) torch.manual_seed(0) tokenized = tokenizer("Today is a nice day and", return_tensors="pt") input_ids = tokenized.input_ids.to(torch_device) MAX_TIME = 0.15 start = datetime.datetime.now() model.generate(input_ids, do_sample=True, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=False, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=False, num_beams=2, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=True, num_beams=2, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=False, max_time=None, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=1.25 * MAX_TIME))
def test_xglm_sample(self): tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M") model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M") torch.manual_seed(0) tokenized = tokenizer("Today is a nice day and", return_tensors="pt") input_ids = tokenized.input_ids output_ids = model.generate(input_ids, do_sample=True, num_beams=1) output_str = tokenizer.decode(output_ids[0], skip_special_tokens=True) EXPECTED_OUTPUT_STR = "Today is a nice day and the sun is shining. A nice day with warm rainy" self.assertEqual(output_str, EXPECTED_OUTPUT_STR)
def test_xglm_sample(self): tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M") model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M") model.to(torch_device) torch.manual_seed(0) tokenized = tokenizer("Today is a nice day and", return_tensors="pt") input_ids = tokenized.input_ids.to(torch_device) output_ids = model.generate(input_ids, do_sample=True, num_beams=1) output_str = tokenizer.decode(output_ids[0], skip_special_tokens=True) EXPECTED_OUTPUT_STR = "Today is a nice day and I am happy to show you all about a recent project for my" self.assertEqual(output_str, EXPECTED_OUTPUT_STR)
def test_batch_generation(self): model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M") model.to(torch_device) tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M") tokenizer.padding_side = "left" # use different length sentences to test batching sentences = [ "Hello, my dog is a little", "Today, I", ] inputs = tokenizer(sentences, return_tensors="pt", padding=True) input_ids = inputs["input_ids"].to(torch_device) outputs = model.generate( input_ids=input_ids, attention_mask=inputs["attention_mask"].to(torch_device), ) inputs_non_padded = tokenizer( sentences[0], return_tensors="pt").input_ids.to(torch_device) output_non_padded = model.generate(input_ids=inputs_non_padded) num_paddings = inputs_non_padded.shape[-1] - inputs["attention_mask"][ -1].long().sum().cpu().item() inputs_padded = tokenizer( sentences[1], return_tensors="pt").input_ids.to(torch_device) output_padded = model.generate(input_ids=inputs_padded, max_length=model.config.max_length - num_paddings) batch_out_sentence = tokenizer.batch_decode(outputs, skip_special_tokens=True) non_padded_sentence = tokenizer.decode(output_non_padded[0], skip_special_tokens=True) padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True) expected_output_sentence = [ "Hello, my dog is a little bit of a shy one, but he is very friendly", "Today, I am going to share with you a few of my favorite things", ] self.assertListEqual(expected_output_sentence, batch_out_sentence) self.assertListEqual(expected_output_sentence, [non_padded_sentence, padded_sentence])
def _test_lm_generate_xglm_helper( self, gradient_checkpointing=False, verify_outputs=True, ): model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M") if gradient_checkpointing: model.gradient_checkpointing_enable() else: model.gradient_checkpointing_disable() model.to(torch_device) input_ids = torch.tensor([[2, 268, 9865]], dtype=torch.long, device=torch_device) # The dog # </s> The dog is a very friendly dog. He is very affectionate and loves to play with other # fmt: off expected_output_ids = [2, 268, 9865, 67, 11, 1988, 57252, 9865, 5, 984, 67, 1988, 213838, 1658, 53, 70446, 33, 6657, 278, 1581] # fmt: on output_ids = model.generate(input_ids, do_sample=False, num_beams=1) if verify_outputs: self.assertListEqual(output_ids[0].tolist(), expected_output_ids)