Ejemplo n.º 1
0
 def test_TFXLMModel(self):
     from transformers import XLMConfig, TFXLMModel
     keras.backend.clear_session()
     # pretrained_weights = 'xlm-mlm-enfr-1024'
     tokenizer_file = 'xlm_xlm-mlm-enfr-1024.pickle'
     tokenizer = self._get_tokenzier(tokenizer_file)
     text, inputs, inputs_onnx = self._prepare_inputs(tokenizer)
     config = XLMConfig()
     model = TFXLMModel(config)
     predictions = model.predict(inputs)
     onnx_model = keras2onnx.convert_keras(model, model.name)
     self.assertTrue(
         run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx, predictions, self.model_files, rtol=1.e-2,
                          atol=1.e-4))
        def create_and_check_xlm_model(
            self,
            config,
            input_ids,
            token_type_ids,
            input_lengths,
            sequence_labels,
            token_labels,
            is_impossible_labels,
            input_mask,
        ):
            model = TFXLMModel(config=config)
            inputs = {
                "input_ids": input_ids,
                "lengths": input_lengths,
                "langs": token_type_ids
            }
            outputs = model(inputs)

            inputs = [input_ids, input_mask]
            outputs = model(inputs)
            sequence_output = outputs[0]
            result = {
                "sequence_output": sequence_output.numpy(),
            }
            self.parent.assertListEqual(
                list(result["sequence_output"].shape),
                [self.batch_size, self.seq_length, self.hidden_size])
Ejemplo n.º 3
0
    def create_and_check_xlm_model(
        self,
        config,
        input_ids,
        token_type_ids,
        input_lengths,
        sequence_labels,
        token_labels,
        is_impossible_labels,
        choice_labels,
        input_mask,
    ):
        model = TFXLMModel(config=config)
        inputs = {
            "input_ids": input_ids,
            "lengths": input_lengths,
            "langs": token_type_ids
        }
        result = model(inputs)

        inputs = [input_ids, input_mask]
        result = model(inputs)
        self.parent.assertEqual(
            result.last_hidden_state.shape,
            (self.batch_size, self.seq_length, self.hidden_size))
Ejemplo n.º 4
0
    def __init__(
            self,
            pretrained_model_name_or_path='xlm-mlm-en-2048',
            reduce_output='sum',
            trainable=True,
            num_tokens=None,
            **kwargs
    ):
        super(XLMEncoder, self).__init__()
        try:
            from transformers import TFXLMModel
        except ModuleNotFoundError:
            logger.error(
                ' transformers is not installed. '
                'In order to install all text feature dependencies run '
                'pip install ludwig[text]'
            )
            sys.exit(-1)

        self.transformer = TFXLMModel.from_pretrained(
            pretrained_model_name_or_path
        )
        self.reduce_output = reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        self.transformer.trainable = trainable
        self.transformer.resize_token_embeddings(num_tokens)
Ejemplo n.º 5
0
 def _test_TFXLM(self, size, large=False):
     from transformers import TFXLMModel, XLMTokenizer
     tokenizer = XLMTokenizer.from_pretrained(size)
     model = TFXLMModel.from_pretrained(size)
     input_dict = tokenizer("Hello, my dog is cute", return_tensors="tf")
     spec, input_dict = self.spec_and_pad(input_dict)
     outputs = ["last_hidden_state"]
     self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large, atol=0.005)
Ejemplo n.º 6
0
 def test_TFXLMModel(self):
     from transformers import XLMTokenizer, TFXLMModel
     pretrained_weights = 'xlm-mlm-enfr-1024'
     tokenizer = XLMTokenizer.from_pretrained(pretrained_weights)
     text, inputs, inputs_onnx = self._prepare_inputs(tokenizer)
     model = TFXLMModel.from_pretrained(pretrained_weights)
     predictions = model.predict(inputs)
     onnx_model = keras2onnx.convert_keras(model, model.name)
     self.assertTrue(
         run_onnx_runtime(onnx_model.graph.name,
                          onnx_model,
                          inputs_onnx,
                          predictions,
                          self.model_files,
                          rtol=1.e-2,
                          atol=1.e-4))
 def test_model_from_pretrained(self):
     for model_name in list(TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
         model = TFXLMModel.from_pretrained(model_name)
         self.assertIsNotNone(model)