def create_and_check_reformer_model_with_lm_backward( self, config, input_ids, input_mask, choice_labels): model = ReformerModelWithLMHead(config=config) model.to(torch_device) model.eval() loss = model(input_ids, attention_mask=input_mask, labels=input_ids)[0] loss.backward()
def create_and_check_reformer_model_fp16_generate(self, config, input_ids, input_mask): model = ReformerModelWithLMHead(config=config) model.to(torch_device) model.half() model.eval() output = model.generate(input_ids, attention_mask=input_mask, do_sample=False) self.parent.assertFalse(torch.isnan(output).any().item())
def create_and_check_past_buckets_states(self, config, input_ids, input_mask, choice_labels): config.is_decoder = True config.lsh_num_chunks_before = 1 config.lsh_num_chunks_after = 0 model = ReformerModelWithLMHead(config=config) model.to(torch_device) model.eval() input_ids_first = input_ids[:, :-1] input_ids_second = input_ids[:, -1:] # return saved cache past_buckets_states = model(input_ids_first, use_cache=True)["past_buckets_states"] # calculate last output with and without cache outputs_with_cache = model(input_ids_second, past_buckets_states=past_buckets_states, use_cache=True)["logits"] outputs_without_cache = model(input_ids)["logits"][:, -1] # select random slice idx random_slice_idx = torch.randint(outputs_without_cache.shape[-1], (1, 1), device=torch_device).item() # outputs should be similar within range self.parent.assertTrue( torch.allclose( outputs_with_cache[:, 0, random_slice_idx], outputs_without_cache[:, random_slice_idx], atol=1e-2 ) )
def create_and_check_reformer_with_lm(self, config, input_ids, input_mask, choice_labels): config.lsh_num_chunks_after = 0 config.is_decoder = True model = ReformerModelWithLMHead(config=config) model.to(torch_device) model.eval() result = model(input_ids, attention_mask=input_mask, labels=input_ids) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
def create_and_check_reformer_feed_backward_chunking( self, config, input_ids, input_mask, choice_labels): if not self.is_training: return # disable dropout config.hidden_dropout_prob = 0 config.local_attention_probs_dropout_prob = 0 config.lsh_attention_probs_dropout_prob = 0 torch.manual_seed(0) model = ReformerModelWithLMHead(config=config) model.to(torch_device) model.train() model.zero_grad() loss_no_chunk, output_no_chunk = model(input_ids, labels=input_ids, attention_mask=input_mask)[:2] loss_no_chunk.backward() grad_slice_word_no_chunk = model.reformer.embeddings.word_embeddings.weight.grad[ 0, :5] grad_slice_position_factor_1_no_chunk = model.reformer.embeddings.position_embeddings.weights[ 0][1, 0, -5:] grad_slice_position_factor_2_no_chunk = model.reformer.embeddings.position_embeddings.weights[ 1][0, 1, :5] config.chunk_size_lm_head = 1 config.chunk_size_feed_forward = 1 torch.manual_seed(0) model = ReformerModelWithLMHead(config=config) model.to(torch_device) model.train() model.zero_grad() loss_chunk, output_chunk = model(input_ids, labels=input_ids, attention_mask=input_mask)[:2] loss_chunk.backward() grad_slice_word_chunk = model.reformer.embeddings.word_embeddings.weight.grad[ 0, :5] grad_slice_position_factor_1_chunk = model.reformer.embeddings.position_embeddings.weights[ 0][1, 0, -5:] grad_slice_position_factor_2_chunk = model.reformer.embeddings.position_embeddings.weights[ 1][0, 1, :5] self.parent.assertTrue( torch.allclose(loss_chunk, loss_no_chunk, atol=1e-3)) self.parent.assertTrue( torch.allclose(grad_slice_word_no_chunk, grad_slice_word_chunk, atol=1e-3)) self.parent.assertTrue( torch.allclose(grad_slice_position_factor_1_chunk, grad_slice_position_factor_1_no_chunk, atol=1e-3)) self.parent.assertTrue( torch.allclose(grad_slice_position_factor_2_chunk, grad_slice_position_factor_2_no_chunk, atol=1e-3))
def create_and_check_reformer_no_chunking(self, config, input_ids, input_mask, choice_labels): # force chunk length to be bigger than input_ids config.lsh_attn_chunk_length = 2 * input_ids.shape[-1] config.local_attn_chunk_length = 2 * input_ids.shape[-1] model = ReformerModelWithLMHead(config=config) model.to(torch_device) model.eval() output_logits = model(input_ids, attention_mask=input_mask)[0] self.parent.assertTrue(output_logits.shape[1] == input_ids.shape[-1])
def create_and_check_reformer_model_fp16_generate(self, config, input_ids, input_mask, choice_labels): config.is_decoder = True config.lsh_num_chunks_after = 0 model = ReformerModelWithLMHead(config=config) model.to(torch_device) model.half() model.eval() # only use last 10 inputs for generation output = model.generate(input_ids[:, -10:], attention_mask=input_mask, do_sample=False) self.parent.assertFalse(torch.isnan(output).any().item())
def create_and_check_reformer_model_generate(self, config, input_ids, input_mask, choice_labels): config.is_decoder = True config.lsh_num_chunks_after = 0 config.bos_token_id = 0 config.eos_token_id = None config.max_length = 20 model = ReformerModelWithLMHead(config=config) model.to(torch_device) model.eval() output = model.generate() self.parent.assertIsNotNone(output)
def create_and_check_reformer_with_lm(self, config, input_ids, input_mask, choice_labels): model = ReformerModelWithLMHead(config=config) model.to(torch_device) model.eval() loss, prediction_scores = model(input_ids, attention_mask=input_mask, labels=input_ids) result = { "loss": loss, "prediction_scores": prediction_scores, } self.parent.assertListEqual( list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size], ) self.check_loss_output(result)