def test_lm_forward(self):
     config, input_ids, batch_size = self._get_config_and_data(output_past=False)
     decoder_lm_labels = ids_tensor([batch_size, input_ids.shape[1]], self.vocab_size)
     lm_model = BartForMaskedLM(config)
     lm_model.to(torch_device)
     loss, logits, enc_features = lm_model.forward(
         input_ids=input_ids, lm_labels=decoder_lm_labels, decoder_input_ids=input_ids
     )
     expected_shape = (batch_size, input_ids.shape[1], config.vocab_size)
     self.assertEqual(logits.shape, expected_shape)
     self.assertIsInstance(loss.item(), float)
Ejemplo n.º 2
0
    def test_lm_forward(self):
        input_ids = torch.tensor(
            [
                [71, 82, 18, 33, 46, 91, 2],
                [68, 34, 26, 58, 30, 82, 2],
                [5, 97, 17, 39, 94, 40, 2],
                [76, 83, 94, 25, 70, 78, 2],
                [87, 59, 41, 35, 48, 66, 2],
                [55, 13, 16, 58, 5, 2, 1],  # note padding
                [64, 27, 31, 51, 12, 75, 2],
                [52, 64, 86, 17, 83, 39, 2],
                [48, 61, 9, 24, 71, 82, 2],
                [26, 1, 60, 48, 22, 13, 2],
                [21, 5, 62, 28, 14, 76, 2],
                [45, 98, 37, 86, 59, 48, 2],
                [70, 70, 50, 9, 28, 0, 2],
            ],
            dtype=torch.long,
            device=torch_device,
        )
        batch_size = input_ids.shape[0]
        decoder_lm_labels = ids_tensor([batch_size, input_ids.shape[1]],
                                       self.vocab_size)

        config = BartConfig(
            vocab_size=self.vocab_size,
            d_model=24,
            encoder_layers=2,
            decoder_layers=2,
            encoder_attention_heads=2,
            decoder_attention_heads=2,
            encoder_ffn_dim=32,
            decoder_ffn_dim=32,
            max_position_embeddings=48,
        )
        model = BartForSequenceClassification(config)
        model.to(torch_device)
        outputs = model.forward(input_ids=input_ids,
                                decoder_input_ids=input_ids)
        logits = outputs[0]
        expected_shape = torch.Size((batch_size, config.num_labels))
        self.assertEqual(logits.shape, expected_shape)

        lm_model = BartForMaskedLM(config)
        lm_model.to(torch_device)
        loss, logits, enc_features = lm_model.forward(
            input_ids=input_ids,
            lm_labels=decoder_lm_labels,
            decoder_input_ids=input_ids)
        expected_shape = (batch_size, input_ids.shape[1], config.vocab_size)
        self.assertEqual(logits.shape, expected_shape)
        self.assertIsInstance(loss.item(), float)
Ejemplo n.º 3
0
def convert_bart_checkpoint(checkpoint_path, pytorch_dump_folder_path):
    """
    Copy/paste/tweak model's weights to our BERT structure.
    """
    bart = torch.hub.load("pytorch/fairseq", checkpoint_path)
    bart.eval()  # disable dropout
    bart.model.upgrade_state_dict(bart.model.state_dict())
    hf_model_name = checkpoint_path.replace(".", "-")
    config = BartConfig.from_pretrained(hf_model_name)
    tokens = bart.encode(SAMPLE_TEXT).unsqueeze(0)
    tokens2 = BartTokenizer.from_pretrained(hf_model_name).encode(
        SAMPLE_TEXT, return_tensors="pt").unsqueeze(0)
    assert torch.eq(tokens, tokens2).all()

    if checkpoint_path in ["bart.large", "bart.large.cnn"]:
        state_dict = bart.model.state_dict()
        for k in IGNORE_KEYS:
            state_dict.pop(k, None)
        state_dict["shared.weight"] = state_dict["decoder.embed_tokens.weight"]
        model = BartModel(config)
        their_output = bart.extract_features(tokens)
    else:  # MNLI Case
        state_dict = bart.state_dict()
        for k in IGNORE_KEYS:
            state_dict.pop(k, None)
        state_dict["model.shared.weight"] = state_dict[
            "model.decoder.embed_tokens.weight"]
        for src, dest in rename_keys:
            rename_key(state_dict, src, dest)
        model = BartForSequenceClassification(config)
        their_output = bart.eval("mnli", tokens, return_logits=True)

    # Load state dict
    model.load_state_dict(state_dict)
    model.eval()
    # Check results

    if checkpoint_path == "bart.large.cnn":  # generate doesnt work yet
        model = BartForMaskedLM(config, base_model=model)
        assert "lm_head.weight" in model.state_dict()
        assert model.lm_head.out_features == config.max_position_embeddings
        model.eval()
        our_outputs = model.model.forward(tokens)[0]
    else:
        our_outputs = model.forward(tokens)[0]
    assert their_output.shape == our_outputs.shape
    assert (their_output == our_outputs).all().item()
    Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
    model.save_pretrained(pytorch_dump_folder_path)
Ejemplo n.º 4
0
 def test_generate(self):
     input_ids = torch.Tensor([[71, 82, 2], [68, 34, 2]]).long()
     config = BartConfig(
         vocab_size=self.vocab_size,
         d_model=24,
         encoder_layers=2,
         decoder_layers=2,
         encoder_attention_heads=2,
         decoder_attention_heads=2,
         encoder_ffn_dim=32,
         decoder_ffn_dim=32,
         max_position_embeddings=48,
         output_past=True,
     )
     lm_model = BartForMaskedLM(config)
     new_input_ids = lm_model.generate(input_ids)
     self.assertEqual(new_input_ids.shape, (input_ids.shape[0], 20))
 def test_lm_uneven_forward(self):
     config = BartConfig(
         vocab_size=self.vocab_size,
         d_model=24,
         encoder_layers=2,
         decoder_layers=2,
         encoder_attention_heads=2,
         decoder_attention_heads=2,
         encoder_ffn_dim=32,
         decoder_ffn_dim=32,
         max_position_embeddings=48,
     )
     lm_model = BartForMaskedLM(config)
     context = torch.Tensor([[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]]).long()
     summary = torch.Tensor([[82, 71, 82, 18, 2], [58, 68, 2, 1, 1]]).long()
     logits, enc_features = lm_model.forward(input_ids=context, decoder_input_ids=summary)
     expected_shape = (*summary.shape, config.vocab_size)
     self.assertEqual(logits.shape, expected_shape)
    def test_generate_beam_search(self):
        input_ids = torch.Tensor([[71, 82, 2], [68, 34, 2]]).long()
        config = BartConfig(
            vocab_size=self.vocab_size,
            d_model=24,
            encoder_layers=2,
            decoder_layers=2,
            encoder_attention_heads=2,
            decoder_attention_heads=2,
            encoder_ffn_dim=32,
            decoder_ffn_dim=32,
            max_position_embeddings=48,
            output_past=True,
        )
        lm_model = BartForMaskedLM(config)
        lm_model.eval()

        new_input_ids = lm_model.generate(
            input_ids.clone(), num_return_sequences=1, num_beams=2, no_repeat_ngram_size=3, max_length=5
        )
        self.assertEqual(new_input_ids.shape, (input_ids.shape[0], 5))
Ejemplo n.º 7
0
 def test_generate_fp16(self):
     config, input_ids, batch_size = self._get_config_and_data(
         output_past=True)
     attention_mask = input_ids.ne(1)
     lm_model = BartForMaskedLM(config).eval().to(torch_device).half()
     lm_model.generate(input_ids, attention_mask)