def convert_rembert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path): # Initialise PyTorch model config = RemBertConfig.from_json_file(bert_config_file) print("Building PyTorch model from configuration: {}".format(str(config))) model = RemBertModel(config) # Load weights from tf checkpoint load_tf_weights_in_rembert(model, config, tf_checkpoint_path) # Save pytorch-model print("Save PyTorch model to {}".format(pytorch_dump_path)) torch.save(model.state_dict(), pytorch_dump_path)
def create_and_check_model_as_decoder( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, encoder_hidden_states, encoder_attention_mask, ): config.add_cross_attention = True model = RemBertModel(config) model.to(torch_device) model.eval() result = model( input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states, encoder_attention_mask=encoder_attention_mask, ) result = model( input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states, ) result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) self.parent.assertEqual( result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
def create_and_check_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = RemBertModel(config=config) model.to(torch_device) model.eval() result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) result = model(input_ids, token_type_ids=token_type_ids) result = model(input_ids) self.parent.assertEqual( result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
def test_inference_model(self): # Test exact values at the last hidden layer model = RemBertModel.from_pretrained("google/rembert") input_ids = torch.tensor([[312, 56498, 313, 2125, 313]]) segment_ids = torch.tensor([[0, 0, 0, 1, 1]]) output = model(input_ids, token_type_ids=segment_ids, output_hidden_states=True) hidden_size = 1152 expected_shape = torch.Size((1, 5, hidden_size)) self.assertEqual(output["last_hidden_state"].shape, expected_shape) expected_implementation = torch.tensor([[ [0.0754, -0.2022, 0.1904], [-0.3354, -0.3692, -0.4791], [-0.2314, -0.6729, -0.0749], [-0.0396, -0.3105, -0.4234], [-0.1571, -0.0525, 0.5353], ]]) # Running on the original tf implementation gives slightly different results here. # Not clear why this variations is present # TODO: Find reason for discrepancy # expected_original_implementation = [[ # [0.07630594074726105, -0.20146065950393677, 0.19107051193714142], # [-0.3405614495277405, -0.36971670389175415, -0.4808273911476135], # [-0.22587086260318756, -0.6656315922737122, -0.07844287157058716], # [-0.04145475849509239, -0.3077218234539032, -0.42316967248916626], # [-0.15887849032878876, -0.054529931396245956, 0.5356100797653198] # ]] self.assertTrue( torch.allclose(output["last_hidden_state"][:, :, :3], expected_implementation, atol=1e-4))
def test_model_from_pretrained(self): for model_name in REMBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = RemBertModel.from_pretrained(model_name) self.assertIsNotNone(model)