def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path): # Initialise PyTorch model config = RoFormerConfig.from_json_file(bert_config_file) print(f"Building PyTorch model from configuration: {config}") model = RoFormerForMaskedLM(config) # Load weights from tf checkpoint load_tf_weights_in_roformer(model, config, tf_checkpoint_path) # Save pytorch-model print(f"Save PyTorch model to {pytorch_dump_path}") torch.save(model.state_dict(), pytorch_dump_path, _use_new_zipfile_serialization=False)
def create_and_check_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = RoFormerForMaskedLM(config=config) model.to(torch_device) model.eval() result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) self.parent.assertEqual( result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
def test_inference_masked_lm(self): model = RoFormerForMaskedLM.from_pretrained( "junnyu/roformer_chinese_base") input_ids = torch.tensor([[0, 1, 2, 3, 4, 5]]) output = model(input_ids)[0] # TODO Replace vocab size vocab_size = 50000 expected_shape = torch.Size((1, 6, vocab_size)) self.assertEqual(output.shape, expected_shape) # TODO Replace values below with what was printed above. expected_slice = torch.tensor([[[-0.1205, -1.0265, 0.2922], [-1.5134, 0.1974, 0.1519], [-5.0135, -3.9003, -0.8404]]]) self.assertTrue( torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))
import torch import tensorflow as tf from transformers import RoFormerForMaskedLM, RoFormerTokenizer, TFRoFormerForMaskedLM text = "今天[MASK]很好,我[MASK]去公园玩。" tokenizer = RoFormerTokenizer.from_pretrained("junnyu/roformer_chinese_base") pt_model = RoFormerForMaskedLM.from_pretrained("junnyu/roformer_chinese_base") tf_model = TFRoFormerForMaskedLM.from_pretrained( "junnyu/roformer_chinese_base", from_pt=True) pt_inputs = tokenizer(text, return_tensors="pt") tf_inputs = tokenizer(text, return_tensors="tf") # pytorch with torch.no_grad(): pt_outputs = pt_model(**pt_inputs).logits[0] pt_outputs_sentence = "pytorch: " for i, id in enumerate(tokenizer.encode(text)): if id == tokenizer.mask_token_id: tokens = tokenizer.convert_ids_to_tokens(pt_outputs[i].topk(k=5)[1]) pt_outputs_sentence += "[" + "||".join(tokens) + "]" else: pt_outputs_sentence += "".join( tokenizer.convert_ids_to_tokens([id], skip_special_tokens=True)) print(pt_outputs_sentence) # tf tf_outputs = tf_model(**tf_inputs, training=False).logits[0] tf_outputs_sentence = "tf: " for i, id in enumerate(tokenizer.encode(text)): if id == tokenizer.mask_token_id: tokens = tokenizer.convert_ids_to_tokens( tf.math.top_k(tf_outputs[i], k=5)[1]) tf_outputs_sentence += "[" + "||".join(tokens) + "]"