def convert_rembert_tf_checkpoint_to_pytorch(tf_checkpoint_path,
                                             bert_config_file,
                                             pytorch_dump_path):
    # Initialise PyTorch model
    config = RemBertConfig.from_json_file(bert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = RemBertModel(config)

    # Load weights from tf checkpoint
    load_tf_weights_in_rembert(model, config, tf_checkpoint_path)

    # Save pytorch-model
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)
 def create_and_check_model_as_decoder(
     self,
     config,
     input_ids,
     token_type_ids,
     input_mask,
     sequence_labels,
     token_labels,
     choice_labels,
     encoder_hidden_states,
     encoder_attention_mask,
 ):
     config.add_cross_attention = True
     model = RemBertModel(config)
     model.to(torch_device)
     model.eval()
     result = model(
         input_ids,
         attention_mask=input_mask,
         token_type_ids=token_type_ids,
         encoder_hidden_states=encoder_hidden_states,
         encoder_attention_mask=encoder_attention_mask,
     )
     result = model(
         input_ids,
         attention_mask=input_mask,
         token_type_ids=token_type_ids,
         encoder_hidden_states=encoder_hidden_states,
     )
     result = model(input_ids,
                    attention_mask=input_mask,
                    token_type_ids=token_type_ids)
     self.parent.assertEqual(
         result.last_hidden_state.shape,
         (self.batch_size, self.seq_length, self.hidden_size))
 def create_and_check_model(self, config, input_ids, token_type_ids,
                            input_mask, sequence_labels, token_labels,
                            choice_labels):
     model = RemBertModel(config=config)
     model.to(torch_device)
     model.eval()
     result = model(input_ids,
                    attention_mask=input_mask,
                    token_type_ids=token_type_ids)
     result = model(input_ids, token_type_ids=token_type_ids)
     result = model(input_ids)
     self.parent.assertEqual(
         result.last_hidden_state.shape,
         (self.batch_size, self.seq_length, self.hidden_size))
    def test_inference_model(self):
        # Test exact values at the last hidden layer
        model = RemBertModel.from_pretrained("google/rembert")
        input_ids = torch.tensor([[312, 56498, 313, 2125, 313]])
        segment_ids = torch.tensor([[0, 0, 0, 1, 1]])
        output = model(input_ids,
                       token_type_ids=segment_ids,
                       output_hidden_states=True)

        hidden_size = 1152

        expected_shape = torch.Size((1, 5, hidden_size))
        self.assertEqual(output["last_hidden_state"].shape, expected_shape)

        expected_implementation = torch.tensor([[
            [0.0754, -0.2022, 0.1904],
            [-0.3354, -0.3692, -0.4791],
            [-0.2314, -0.6729, -0.0749],
            [-0.0396, -0.3105, -0.4234],
            [-0.1571, -0.0525, 0.5353],
        ]])

        # Running on the original tf implementation gives slightly different results here.
        # Not clear why this variations is present
        # TODO: Find reason for discrepancy
        # expected_original_implementation = [[
        #     [0.07630594074726105, -0.20146065950393677, 0.19107051193714142],
        #     [-0.3405614495277405, -0.36971670389175415, -0.4808273911476135],
        #     [-0.22587086260318756, -0.6656315922737122, -0.07844287157058716],
        #     [-0.04145475849509239, -0.3077218234539032, -0.42316967248916626],
        #     [-0.15887849032878876, -0.054529931396245956, 0.5356100797653198]
        # ]]

        self.assertTrue(
            torch.allclose(output["last_hidden_state"][:, :, :3],
                           expected_implementation,
                           atol=1e-4))
 def test_model_from_pretrained(self):
     for model_name in REMBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
         model = RemBertModel.from_pretrained(model_name)
         self.assertIsNotNone(model)