def test_lm_model_forward(self): config = self._get_basic_config_and_input() config["attn_layers"] = ["local", "lsh", "local", "lsh", "local", "lsh"] config["num_buckets"] = [2, 4] config["is_decoder"] = False torch.manual_seed(0) model = ReformerForMaskedLM(ReformerConfig(**config)).to(torch_device) model.eval() input_ids, attn_mask = self._get_input_ids_and_mask() hidden_states = model(input_ids=input_ids, attention_mask=attn_mask)[0] output_slice = hidden_states[1, -1, :5] expected_output_slice = torch.tensor( [0.0256, -0.0121, 0.0636, 0.0024, -0.0393], dtype=torch.float, device=torch_device, ) self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
def masked_mlm(): from transformers import ReformerConfig, ReformerForMaskedLM config = ReformerConfig.from_pretrained('google/reformer-enwik8') config.is_decoder = False model = ReformerForMaskedLM.from_pretrained('google/reformer-enwik8', config=config) sentence = sentence2 = "The quick brown fox jumps over the lazy dog." input_ids, attention_masks = encode([sentence]) if True: _input_ids, a = input_ids.clone(), attention_masks.clone() for i in [19, 27, 37]: a[0, i] = 0 sentence2 = sentence2[:i] + "%" + sentence2[i + 1:] else: _input_ids, a = input_ids, attention_masks f = model.forward(input_ids=_input_ids, position_ids=None, attention_mask=a, head_mask=None, inputs_embeds=None, num_hashes=None, labels=_input_ids) prediction = decode(torch.argmax(f.logits, 2))[0] print(sentence2) print(prediction)
def create_and_check_reformer_with_mlm(self, config, input_ids, input_mask, choice_labels): config.is_decoder = False model = ReformerForMaskedLM(config=config) model.to(torch_device) model.eval() result = model(input_ids, attention_mask=input_mask, labels=input_ids) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
def create_and_check_reformer_model_with_lm_backward(self, config, input_ids, input_mask, choice_labels): config.is_decoder = False config.lsh_num_chunks_after = 1 model = ReformerForMaskedLM(config=config) model.to(torch_device) model.eval() loss = model(input_ids, attention_mask=input_mask, labels=input_ids)[0] loss.backward()
def create_and_check_reformer_no_chunking(self, config, input_ids, input_mask, choice_labels): # force chunk length to be bigger than input_ids config.lsh_attn_chunk_length = 2 * input_ids.shape[-1] config.local_attn_chunk_length = 2 * input_ids.shape[-1] config.lsh_num_chunks_after = 1 config.is_decoder = False model = ReformerForMaskedLM(config=config) model.to(torch_device) model.eval() output_logits = model(input_ids, attention_mask=input_mask)["logits"] self.parent.assertTrue(output_logits.shape[1] == input_ids.shape[-1])
def create_and_check_reformer_with_mlm(self, config, input_ids, input_mask, choice_labels): config.is_decoder = False model = ReformerForMaskedLM(config=config) model.to(torch_device) model.eval() loss, prediction_scores = model(input_ids, attention_mask=input_mask, labels=input_ids) result = { "loss": loss, "prediction_scores": prediction_scores, } self.parent.assertListEqual( list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size], ) self.check_loss_output(result)
from transformers import ReformerConfig, ReformerForMaskedLM, ReformerTokenizer, LineByLineTextDataset,DataCollatorForLanguageModeling from reformer_utils import encode, decode, CharTokenizer import torch from general_tools.utils import get_root from pathlib import Path ROOT = get_root("internn") MSK = 44 config = ReformerConfig.from_pretrained('google/reformer-enwik8') config.is_decoder = False model = ReformerForMaskedLM.from_pretrained('google/reformer-enwik8', config=config) sentence = "The quick brown fox jumps over the lazy dog." input_ids, attention_masks = encode([sentence]) label_ids, _ = encode([sentence]) for idx in [10,21,26,32,35]: input_ids[0,idx] = MSK attention_masks[0,idx] = 0 f = model.forward(input_ids=input_ids, position_ids=None, attention_mask=attention_masks, head_mask=None, inputs_embeds=None, num_hashes=None, labels=label_ids) loss = f.loss prediction = decode(torch.argmax(f.logits, 2))[0] print(prediction)
def create_and_check_reformer_feed_backward_chunking(self, config, input_ids, input_mask, choice_labels): if not self.is_training: return # disable dropout config.hidden_dropout_prob = 0 config.local_attention_probs_dropout_prob = 0 config.lsh_attention_probs_dropout_prob = 0 config.lsh_num_chunks_after = 1 config.is_decoder = False torch.manual_seed(0) model = ReformerForMaskedLM(config=config) model.to(torch_device) model.train() model.zero_grad() loss_no_chunk, output_no_chunk = model(input_ids, labels=input_ids, attention_mask=input_mask)[:2] loss_no_chunk.backward() grad_slice_word_no_chunk = model.reformer.embeddings.word_embeddings.weight.grad[0, :5] grad_slice_position_factor_1_no_chunk = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:] grad_slice_position_factor_2_no_chunk = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5] config.chunk_size_lm_head = 1 config.chunk_size_feed_forward = 1 torch.manual_seed(0) model = ReformerForMaskedLM(config=config) model.to(torch_device) model.train() model.zero_grad() loss_chunk, output_chunk = model(input_ids, labels=input_ids, attention_mask=input_mask)[:2] loss_chunk.backward() grad_slice_word_chunk = model.reformer.embeddings.word_embeddings.weight.grad[0, :5] grad_slice_position_factor_1_chunk = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:] grad_slice_position_factor_2_chunk = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5] self.parent.assertTrue(torch.allclose(loss_chunk, loss_no_chunk, atol=1e-3)) self.parent.assertTrue(torch.allclose(grad_slice_word_no_chunk, grad_slice_word_chunk, atol=1e-3)) self.parent.assertTrue( torch.allclose(grad_slice_position_factor_1_chunk, grad_slice_position_factor_1_no_chunk, atol=1e-3) ) self.parent.assertTrue( torch.allclose(grad_slice_position_factor_2_chunk, grad_slice_position_factor_2_no_chunk, atol=1e-3) )