def test_TFXLMWithLMHeadModel(self):
     from transformers import XLMConfig, TFXLMWithLMHeadModel
     keras.backend.clear_session()
     # pretrained_weights = 'xlm-mlm-enfr-1024'
     tokenizer_file = 'xlm_xlm-mlm-enfr-1024.pickle'
     tokenizer = self._get_tokenzier(tokenizer_file)
     text, inputs, inputs_onnx = self._prepare_inputs(tokenizer)
     config = XLMConfig()
     model = TFXLMWithLMHeadModel(config)
     predictions = model.predict(inputs)
     onnx_model = keras2onnx.convert_keras(model, model.name)
     self.assertTrue(
         run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx, predictions, self.model_files, rtol=1.e-2,
                          atol=1.e-4))
 def test_lm_generate_xlm_mlm_en_2048(self):
     model = TFXLMWithLMHeadModel.from_pretrained("xlm-mlm-en-2048")
     input_ids = tf.convert_to_tensor([[14, 447]],
                                      dtype=tf.int32)  # the president
     expected_output_ids = [
         14,
         447,
         14,
         447,
         14,
         447,
         14,
         447,
         14,
         447,
         14,
         447,
         14,
         447,
         14,
         447,
         14,
         447,
         14,
         447,
     ]  # the president the president the president the president the president the president the president the president the president the president
     # TODO(PVP): this and other input_ids I tried for generation give pretty bad results. Not sure why. Model might just not be made for auto-regressive inference
     output_ids = model.generate(input_ids, do_sample=False)
     self.assertListEqual(output_ids[0].numpy().tolist(),
                          expected_output_ids)
        def create_and_check_xlm_lm_head(
            self,
            config,
            input_ids,
            token_type_ids,
            input_lengths,
            sequence_labels,
            token_labels,
            is_impossible_labels,
            input_mask,
        ):
            model = TFXLMWithLMHeadModel(config)

            inputs = {
                "input_ids": input_ids,
                "lengths": input_lengths,
                "langs": token_type_ids
            }
            outputs = model(inputs)

            logits = outputs[0]

            result = {
                "logits": logits.numpy(),
            }

            self.parent.assertListEqual(
                list(result["logits"].shape),
                [self.batch_size, self.seq_length, self.vocab_size])
Beispiel #4
0
    def create_and_check_xlm_lm_head(
        self,
        config,
        input_ids,
        token_type_ids,
        input_lengths,
        sequence_labels,
        token_labels,
        is_impossible_labels,
        choice_labels,
        input_mask,
    ):
        model = TFXLMWithLMHeadModel(config)

        inputs = {
            "input_ids": input_ids,
            "lengths": input_lengths,
            "langs": token_type_ids
        }
        outputs = model(inputs)

        result = outputs

        self.parent.assertEqual(
            result.logits.shape,
            (self.batch_size, self.seq_length, self.vocab_size))
 def test_TFXLMWithLMHeadModel(self):
     from transformers import XLMTokenizer, TFXLMWithLMHeadModel
     pretrained_weights = 'xlm-mlm-enfr-1024'
     tokenizer = XLMTokenizer.from_pretrained(pretrained_weights)
     text, inputs, inputs_onnx = self._prepare_inputs(tokenizer)
     model = TFXLMWithLMHeadModel.from_pretrained(pretrained_weights)
     predictions = model.predict(inputs)
     onnx_model = keras2onnx.convert_keras(model, model.name)
     self.assertTrue(
         run_onnx_runtime(onnx_model.graph.name,
                          onnx_model,
                          inputs_onnx,
                          predictions,
                          self.model_files,
                          rtol=1.e-2,
                          atol=1.e-4))