예제 #1
0
 def test_lm_model_forward(self):
     config = self._get_basic_config_and_input()
     config["attn_layers"] = ["local", "lsh", "local", "lsh", "local", "lsh"]
     config["num_buckets"] = [2, 4]
     config["is_decoder"] = False
     torch.manual_seed(0)
     model = ReformerForMaskedLM(ReformerConfig(**config)).to(torch_device)
     model.eval()
     input_ids, attn_mask = self._get_input_ids_and_mask()
     hidden_states = model(input_ids=input_ids, attention_mask=attn_mask)[0]
     output_slice = hidden_states[1, -1, :5]
     expected_output_slice = torch.tensor(
         [0.0256, -0.0121, 0.0636, 0.0024, -0.0393], dtype=torch.float, device=torch_device,
     )
     self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
예제 #2
0
    def masked_mlm():
        from transformers import ReformerConfig, ReformerForMaskedLM
        config = ReformerConfig.from_pretrained('google/reformer-enwik8')
        config.is_decoder = False
        model = ReformerForMaskedLM.from_pretrained('google/reformer-enwik8',
                                                    config=config)

        sentence = sentence2 = "The quick brown fox jumps over the lazy dog."
        input_ids, attention_masks = encode([sentence])
        if True:
            _input_ids, a = input_ids.clone(), attention_masks.clone()
            for i in [19, 27, 37]:
                a[0, i] = 0
                sentence2 = sentence2[:i] + "%" + sentence2[i + 1:]
        else:
            _input_ids, a = input_ids, attention_masks
        f = model.forward(input_ids=_input_ids,
                          position_ids=None,
                          attention_mask=a,
                          head_mask=None,
                          inputs_embeds=None,
                          num_hashes=None,
                          labels=_input_ids)
        prediction = decode(torch.argmax(f.logits, 2))[0]
        print(sentence2)
        print(prediction)
예제 #3
0
 def create_and_check_reformer_with_mlm(self, config, input_ids, input_mask, choice_labels):
     config.is_decoder = False
     model = ReformerForMaskedLM(config=config)
     model.to(torch_device)
     model.eval()
     result = model(input_ids, attention_mask=input_mask, labels=input_ids)
     self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
예제 #4
0
 def create_and_check_reformer_model_with_lm_backward(self, config, input_ids, input_mask, choice_labels):
     config.is_decoder = False
     config.lsh_num_chunks_after = 1
     model = ReformerForMaskedLM(config=config)
     model.to(torch_device)
     model.eval()
     loss = model(input_ids, attention_mask=input_mask, labels=input_ids)[0]
     loss.backward()
예제 #5
0
 def create_and_check_reformer_no_chunking(self, config, input_ids, input_mask, choice_labels):
     # force chunk length to be bigger than input_ids
     config.lsh_attn_chunk_length = 2 * input_ids.shape[-1]
     config.local_attn_chunk_length = 2 * input_ids.shape[-1]
     config.lsh_num_chunks_after = 1
     config.is_decoder = False
     model = ReformerForMaskedLM(config=config)
     model.to(torch_device)
     model.eval()
     output_logits = model(input_ids, attention_mask=input_mask)["logits"]
     self.parent.assertTrue(output_logits.shape[1] == input_ids.shape[-1])
예제 #6
0
 def create_and_check_reformer_with_mlm(self, config, input_ids, input_mask, choice_labels):
     config.is_decoder = False
     model = ReformerForMaskedLM(config=config)
     model.to(torch_device)
     model.eval()
     loss, prediction_scores = model(input_ids, attention_mask=input_mask, labels=input_ids)
     result = {
         "loss": loss,
         "prediction_scores": prediction_scores,
     }
     self.parent.assertListEqual(
         list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size],
     )
     self.check_loss_output(result)
예제 #7
0
from transformers import ReformerConfig, ReformerForMaskedLM, ReformerTokenizer, LineByLineTextDataset,DataCollatorForLanguageModeling
from reformer_utils import encode, decode, CharTokenizer
import torch
from general_tools.utils import get_root
from pathlib import Path
ROOT = get_root("internn")

MSK = 44
config = ReformerConfig.from_pretrained('google/reformer-enwik8')
config.is_decoder = False
model = ReformerForMaskedLM.from_pretrained('google/reformer-enwik8', config=config)

sentence = "The quick brown fox jumps over the lazy dog."

input_ids, attention_masks = encode([sentence])
label_ids, _ = encode([sentence])
for idx in [10,21,26,32,35]:
    input_ids[0,idx] = MSK
    attention_masks[0,idx] = 0

f = model.forward(input_ids=input_ids,
                  position_ids=None,
                  attention_mask=attention_masks,
                  head_mask=None,
                  inputs_embeds=None,
                  num_hashes=None,
                  labels=label_ids)

loss = f.loss
prediction = decode(torch.argmax(f.logits, 2))[0]
print(prediction)
예제 #8
0
    def create_and_check_reformer_feed_backward_chunking(self, config, input_ids, input_mask, choice_labels):
        if not self.is_training:
            return

        # disable dropout
        config.hidden_dropout_prob = 0
        config.local_attention_probs_dropout_prob = 0
        config.lsh_attention_probs_dropout_prob = 0
        config.lsh_num_chunks_after = 1
        config.is_decoder = False

        torch.manual_seed(0)
        model = ReformerForMaskedLM(config=config)
        model.to(torch_device)
        model.train()
        model.zero_grad()
        loss_no_chunk, output_no_chunk = model(input_ids, labels=input_ids, attention_mask=input_mask)[:2]
        loss_no_chunk.backward()
        grad_slice_word_no_chunk = model.reformer.embeddings.word_embeddings.weight.grad[0, :5]
        grad_slice_position_factor_1_no_chunk = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:]
        grad_slice_position_factor_2_no_chunk = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5]

        config.chunk_size_lm_head = 1
        config.chunk_size_feed_forward = 1

        torch.manual_seed(0)
        model = ReformerForMaskedLM(config=config)
        model.to(torch_device)
        model.train()
        model.zero_grad()
        loss_chunk, output_chunk = model(input_ids, labels=input_ids, attention_mask=input_mask)[:2]
        loss_chunk.backward()
        grad_slice_word_chunk = model.reformer.embeddings.word_embeddings.weight.grad[0, :5]
        grad_slice_position_factor_1_chunk = model.reformer.embeddings.position_embeddings.weights[0][1, 0, -5:]
        grad_slice_position_factor_2_chunk = model.reformer.embeddings.position_embeddings.weights[1][0, 1, :5]
        self.parent.assertTrue(torch.allclose(loss_chunk, loss_no_chunk, atol=1e-3))
        self.parent.assertTrue(torch.allclose(grad_slice_word_no_chunk, grad_slice_word_chunk, atol=1e-3))
        self.parent.assertTrue(
            torch.allclose(grad_slice_position_factor_1_chunk, grad_slice_position_factor_1_no_chunk, atol=1e-3)
        )
        self.parent.assertTrue(
            torch.allclose(grad_slice_position_factor_2_chunk, grad_slice_position_factor_2_no_chunk, atol=1e-3)
        )