Beispiel #1
0
 def create_and_check_reformer_model_with_lm_backward(
         self, config, input_ids, input_mask, choice_labels):
     model = ReformerModelWithLMHead(config=config)
     model.to(torch_device)
     model.eval()
     loss = model(input_ids, attention_mask=input_mask, labels=input_ids)[0]
     loss.backward()
 def create_and_check_reformer_model_fp16_generate(self, config, input_ids, input_mask):
     model = ReformerModelWithLMHead(config=config)
     model.to(torch_device)
     model.half()
     model.eval()
     output = model.generate(input_ids, attention_mask=input_mask, do_sample=False)
     self.parent.assertFalse(torch.isnan(output).any().item())
    def create_and_check_past_buckets_states(self, config, input_ids, input_mask, choice_labels):
        config.is_decoder = True
        config.lsh_num_chunks_before = 1
        config.lsh_num_chunks_after = 0
        model = ReformerModelWithLMHead(config=config)
        model.to(torch_device)
        model.eval()
        input_ids_first = input_ids[:, :-1]
        input_ids_second = input_ids[:, -1:]

        # return saved cache
        past_buckets_states = model(input_ids_first, use_cache=True)["past_buckets_states"]

        # calculate last output with and without cache
        outputs_with_cache = model(input_ids_second, past_buckets_states=past_buckets_states, use_cache=True)["logits"]
        outputs_without_cache = model(input_ids)["logits"][:, -1]

        # select random slice idx
        random_slice_idx = torch.randint(outputs_without_cache.shape[-1], (1, 1), device=torch_device).item()

        # outputs should be similar within range
        self.parent.assertTrue(
            torch.allclose(
                outputs_with_cache[:, 0, random_slice_idx], outputs_without_cache[:, random_slice_idx], atol=1e-2
            )
        )
 def create_and_check_reformer_with_lm(self, config, input_ids, input_mask, choice_labels):
     config.lsh_num_chunks_after = 0
     config.is_decoder = True
     model = ReformerModelWithLMHead(config=config)
     model.to(torch_device)
     model.eval()
     result = model(input_ids, attention_mask=input_mask, labels=input_ids)
     self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
 def create_and_check_reformer_no_chunking(self, config, input_ids, input_mask, choice_labels):
     # force chunk length to be bigger than input_ids
     config.lsh_attn_chunk_length = 2 * input_ids.shape[-1]
     config.local_attn_chunk_length = 2 * input_ids.shape[-1]
     model = ReformerModelWithLMHead(config=config)
     model.to(torch_device)
     model.eval()
     output_logits = model(input_ids, attention_mask=input_mask)[0]
     self.parent.assertTrue(output_logits.shape[1] == input_ids.shape[-1])
 def create_and_check_reformer_model_fp16_generate(self, config, input_ids, input_mask, choice_labels):
     config.is_decoder = True
     config.lsh_num_chunks_after = 0
     model = ReformerModelWithLMHead(config=config)
     model.to(torch_device)
     model.half()
     model.eval()
     # only use last 10 inputs for generation
     output = model.generate(input_ids[:, -10:], attention_mask=input_mask, do_sample=False)
     self.parent.assertFalse(torch.isnan(output).any().item())
    def create_and_check_reformer_model_generate(self, config, input_ids, input_mask, choice_labels):
        config.is_decoder = True
        config.lsh_num_chunks_after = 0
        config.bos_token_id = 0
        config.eos_token_id = None
        config.max_length = 20

        model = ReformerModelWithLMHead(config=config)
        model.to(torch_device)
        model.eval()
        output = model.generate()
        self.parent.assertIsNotNone(output)
 def create_and_check_reformer_with_lm(self, config, input_ids, input_mask, choice_labels):
     model = ReformerModelWithLMHead(config=config)
     model.to(torch_device)
     model.eval()
     loss, prediction_scores = model(input_ids, attention_mask=input_mask, labels=input_ids)
     result = {
         "loss": loss,
         "prediction_scores": prediction_scores,
     }
     self.parent.assertListEqual(
         list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size],
     )
     self.check_loss_output(result)
 def test_lm_model_forward(self):
     config = self._get_basic_config_and_input()
     config["attn_layers"] = ["local", "lsh", "local", "lsh", "local", "lsh"]
     config["num_buckets"] = [2, 4]
     config["is_decoder"] = False
     torch.manual_seed(0)
     model = ReformerModelWithLMHead(ReformerConfig(**config)).to(torch_device)
     model.eval()
     input_ids, attn_mask = self._get_input_ids_and_mask()
     hidden_states = model(input_ids=input_ids, attention_mask=attn_mask)[0]
     output_slice = hidden_states[1, -1, :5]
     expected_output_slice = torch.tensor(
         [0.0324, -0.0121, 0.0615, 0.0031, -0.0297], dtype=torch.float, device=torch_device,
     )
     self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
Beispiel #10
0
                        attention_mask=attention_mask,
                        labels=labels)
        loss, prediction_scores = outputs[:2]
        loss.backward()

        training_loss[f"Epoch {x} Step {i}"] = loss.item()
        all_training_loss[f"Epoch {x} Step {i}"] = loss.item()
        print(f'training loss: {loss.item()}')

        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)

        optimizer.step()
        optimizer.zero_grad()

        if i % VALIDATE_EVERY == 0:
            model.eval()
            with torch.no_grad():
                tmp = next(val_loader)
                input_ids = tmp['input_ids']
                attention_mask = tmp['attention_mask']
                labels = tmp['labels']

                outputs = model(input_ids,
                                attention_mask=attention_mask,
                                labels=labels)
                loss, prediction_scores = outputs[:2]

                val_loss[f"Epoch {x} Step {i}"] = loss.item()
                all_val_loss[f"Epoch {x} Step {i}"] = loss.item()
                print(f'validation loss: {loss.item()}')