def test_TFXLMModel(self): from transformers import XLMConfig, TFXLMModel keras.backend.clear_session() # pretrained_weights = 'xlm-mlm-enfr-1024' tokenizer_file = 'xlm_xlm-mlm-enfr-1024.pickle' tokenizer = self._get_tokenzier(tokenizer_file) text, inputs, inputs_onnx = self._prepare_inputs(tokenizer) config = XLMConfig() model = TFXLMModel(config) predictions = model.predict(inputs) onnx_model = keras2onnx.convert_keras(model, model.name) self.assertTrue( run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx, predictions, self.model_files, rtol=1.e-2, atol=1.e-4))
def create_and_check_xlm_model( self, config, input_ids, token_type_ids, input_lengths, sequence_labels, token_labels, is_impossible_labels, input_mask, ): model = TFXLMModel(config=config) inputs = { "input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids } outputs = model(inputs) inputs = [input_ids, input_mask] outputs = model(inputs) sequence_output = outputs[0] result = { "sequence_output": sequence_output.numpy(), } self.parent.assertListEqual( list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size])
def create_and_check_xlm_model( self, config, input_ids, token_type_ids, input_lengths, sequence_labels, token_labels, is_impossible_labels, choice_labels, input_mask, ): model = TFXLMModel(config=config) inputs = { "input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids } result = model(inputs) inputs = [input_ids, input_mask] result = model(inputs) self.parent.assertEqual( result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
def __init__( self, pretrained_model_name_or_path='xlm-mlm-en-2048', reduce_output='sum', trainable=True, num_tokens=None, **kwargs ): super(XLMEncoder, self).__init__() try: from transformers import TFXLMModel except ModuleNotFoundError: logger.error( ' transformers is not installed. ' 'In order to install all text feature dependencies run ' 'pip install ludwig[text]' ) sys.exit(-1) self.transformer = TFXLMModel.from_pretrained( pretrained_model_name_or_path ) self.reduce_output = reduce_output self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output) self.transformer.trainable = trainable self.transformer.resize_token_embeddings(num_tokens)
def _test_TFXLM(self, size, large=False): from transformers import TFXLMModel, XLMTokenizer tokenizer = XLMTokenizer.from_pretrained(size) model = TFXLMModel.from_pretrained(size) input_dict = tokenizer("Hello, my dog is cute", return_tensors="tf") spec, input_dict = self.spec_and_pad(input_dict) outputs = ["last_hidden_state"] self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large, atol=0.005)
def test_TFXLMModel(self): from transformers import XLMTokenizer, TFXLMModel pretrained_weights = 'xlm-mlm-enfr-1024' tokenizer = XLMTokenizer.from_pretrained(pretrained_weights) text, inputs, inputs_onnx = self._prepare_inputs(tokenizer) model = TFXLMModel.from_pretrained(pretrained_weights) predictions = model.predict(inputs) onnx_model = keras2onnx.convert_keras(model, model.name) self.assertTrue( run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx, predictions, self.model_files, rtol=1.e-2, atol=1.e-4))
def test_model_from_pretrained(self): for model_name in list(TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = TFXLMModel.from_pretrained(model_name) self.assertIsNotNone(model)