Exemple #1
0
class TFGPT2ModelTest(TFModelTesterMixin, TFCoreModelTesterMixin,
                      unittest.TestCase):

    all_model_classes = ((TFGPT2Model, TFGPT2LMHeadModel,
                          TFGPT2ForSequenceClassification,
                          TFGPT2DoubleHeadsModel) if is_tf_available() else ())
    all_generative_model_classes = (
        TFGPT2LMHeadModel, ) if is_tf_available() else ()
    test_head_masking = False
    test_onnx = True
    onnx_min_opset = 10

    def setUp(self):
        self.model_tester = TFGPT2ModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=GPT2Config,
                                          n_embd=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_gpt2_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_model(*config_and_inputs)

    def test_gpt2_model_past(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_model_past(*config_and_inputs)

    def test_gpt2_model_att_mask_past(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_model_attention_mask_past(
            *config_and_inputs)

    def test_gpt2_model_past_large_inputs(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_model_past_large_inputs(
            *config_and_inputs)

    def test_gpt2_lm_head(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_lm_head(*config_and_inputs)

    def test_gpt2_double_head(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_double_head(*config_and_inputs)

    def test_model_common_attributes(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            model = model_class(config)
            assert isinstance(model.get_input_embeddings(),
                              tf.keras.layers.Layer)

            if model_class in self.all_generative_model_classes:
                x = model.get_output_embeddings()
                assert isinstance(x, tf.keras.layers.Layer)
                name = model.get_bias()
                assert name is None
            else:
                x = model.get_output_embeddings()
                assert x is None
                name = model.get_bias()
                assert name is None

    def test_gpt2_sequence_classification_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_for_sequence_classification(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = TFGPT2Model.from_pretrained(model_name)
            self.assertIsNotNone(model)

    # overwrite from common since ONNX runtime optimization doesn't work with tf.gather() when the argument
    # `batch_dims` > 0"
    @require_tf2onnx
    @slow
    def test_onnx_runtime_optimize(self):
        if not self.test_onnx:
            return

        import onnxruntime
        import tf2onnx

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:

            # Skip these 2 classes which uses `tf.gather` with `batch_dims=1`
            if model_class in [
                    TFGPT2ForSequenceClassification, TFGPT2DoubleHeadsModel
            ]:
                continue

            model = model_class(config)
            model(model.dummy_inputs)

            onnx_model_proto, _ = tf2onnx.convert.from_keras(
                model, opset=self.onnx_min_opset)

            onnxruntime.InferenceSession(onnx_model_proto.SerializeToString())
Exemple #2
0
class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        TFBertModel,
        TFBertForMaskedLM,
        TFBertLMHeadModel,
        TFBertForNextSentencePrediction,
        TFBertForPreTraining,
        TFBertForQuestionAnswering,
        TFBertForSequenceClassification,
        TFBertForTokenClassification,
        TFBertForMultipleChoice,
    ) if is_tf_available() else ())

    def setUp(self):
        self.model_tester = TFBertModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=BertConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_bert_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_model(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_masked_lm(
            *config_and_inputs)

    def test_for_causal_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_lm_head(*config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_multiple_choice(
            *config_and_inputs)

    def test_for_next_sequence_prediction(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_next_sequence_prediction(
            *config_and_inputs)

    def test_for_pretraining(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_pretraining(
            *config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_question_answering(
            *config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_sequence_classification(
            *config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_token_classification(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        # for model_name in TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
        for model_name in ["bert-base-uncased"]:
            model = TFBertModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
Exemple #3
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import unittest

from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester
from .utils import require_tf, slow

from transformers import DistilBertConfig, is_tf_available

if is_tf_available():
    import tensorflow as tf
    from transformers.modeling_tf_distilbert import (TFDistilBertModel,
                                                             TFDistilBertForMaskedLM,
                                                             TFDistilBertForQuestionAnswering,
                                                             TFDistilBertForSequenceClassification)


@require_tf
class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):

    all_model_classes = (TFDistilBertModel, TFDistilBertForMaskedLM, TFDistilBertForQuestionAnswering,
                         TFDistilBertForSequenceClassification) if is_tf_available() else None
    test_pruning = True
    test_torchscript = True
    test_resize_embeddings = True
class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        TFBertModel,
        TFBertForMaskedLM,
        TFBertForNextSentencePrediction,
        TFBertForPreTraining,
        TFBertForQuestionAnswering,
        TFBertForSequenceClassification,
        TFBertForTokenClassification,
    ) if is_tf_available() else ())

    class TFBertModelTester(object):
        def __init__(
            self,
            parent,
            batch_size=13,
            seq_length=7,
            is_training=True,
            use_input_mask=True,
            use_token_type_ids=True,
            use_labels=True,
            vocab_size=99,
            hidden_size=32,
            num_hidden_layers=5,
            num_attention_heads=4,
            intermediate_size=37,
            hidden_act="gelu",
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1,
            max_position_embeddings=512,
            type_vocab_size=16,
            type_sequence_label_size=2,
            initializer_range=0.02,
            num_labels=3,
            num_choices=4,
            scope=None,
        ):
            self.parent = parent
            self.batch_size = batch_size
            self.seq_length = seq_length
            self.is_training = is_training
            self.use_input_mask = use_input_mask
            self.use_token_type_ids = use_token_type_ids
            self.use_labels = use_labels
            self.vocab_size = vocab_size
            self.hidden_size = hidden_size
            self.num_hidden_layers = num_hidden_layers
            self.num_attention_heads = num_attention_heads
            self.intermediate_size = intermediate_size
            self.hidden_act = hidden_act
            self.hidden_dropout_prob = hidden_dropout_prob
            self.attention_probs_dropout_prob = attention_probs_dropout_prob
            self.max_position_embeddings = max_position_embeddings
            self.type_vocab_size = type_vocab_size
            self.type_sequence_label_size = type_sequence_label_size
            self.initializer_range = initializer_range
            self.num_labels = num_labels
            self.num_choices = num_choices
            self.scope = scope

        def prepare_config_and_inputs(self):
            input_ids = ids_tensor([self.batch_size, self.seq_length],
                                   self.vocab_size)

            input_mask = None
            if self.use_input_mask:
                input_mask = ids_tensor([self.batch_size, self.seq_length],
                                        vocab_size=2)

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = ids_tensor([self.batch_size, self.seq_length],
                                            self.type_vocab_size)

            sequence_labels = None
            token_labels = None
            choice_labels = None
            if self.use_labels:
                sequence_labels = ids_tensor([self.batch_size],
                                             self.type_sequence_label_size)
                token_labels = ids_tensor([self.batch_size, self.seq_length],
                                          self.num_labels)
                choice_labels = ids_tensor([self.batch_size], self.num_choices)

            config = BertConfig(
                vocab_size=self.vocab_size,
                hidden_size=self.hidden_size,
                num_hidden_layers=self.num_hidden_layers,
                num_attention_heads=self.num_attention_heads,
                intermediate_size=self.intermediate_size,
                hidden_act=self.hidden_act,
                hidden_dropout_prob=self.hidden_dropout_prob,
                attention_probs_dropout_prob=self.attention_probs_dropout_prob,
                max_position_embeddings=self.max_position_embeddings,
                type_vocab_size=self.type_vocab_size,
                initializer_range=self.initializer_range,
            )

            return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

        def create_and_check_bert_model(self, config, input_ids,
                                        token_type_ids, input_mask,
                                        sequence_labels, token_labels,
                                        choice_labels):
            model = TFBertModel(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            sequence_output, pooled_output = model(inputs)

            inputs = [input_ids, input_mask]
            sequence_output, pooled_output = model(inputs)

            sequence_output, pooled_output = model(input_ids)

            result = {
                "sequence_output": sequence_output.numpy(),
                "pooled_output": pooled_output.numpy(),
            }
            self.parent.assertListEqual(
                list(result["sequence_output"].shape),
                [self.batch_size, self.seq_length, self.hidden_size])
            self.parent.assertListEqual(list(result["pooled_output"].shape),
                                        [self.batch_size, self.hidden_size])

        def create_and_check_bert_for_masked_lm(self, config, input_ids,
                                                token_type_ids, input_mask,
                                                sequence_labels, token_labels,
                                                choice_labels):
            model = TFBertForMaskedLM(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            (prediction_scores, ) = model(inputs)
            result = {
                "prediction_scores": prediction_scores.numpy(),
            }
            self.parent.assertListEqual(
                list(result["prediction_scores"].shape),
                [self.batch_size, self.seq_length, self.vocab_size])

        def create_and_check_bert_for_next_sequence_prediction(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            model = TFBertForNextSentencePrediction(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            (seq_relationship_score, ) = model(inputs)
            result = {
                "seq_relationship_score": seq_relationship_score.numpy(),
            }
            self.parent.assertListEqual(
                list(result["seq_relationship_score"].shape),
                [self.batch_size, 2])

        def create_and_check_bert_for_pretraining(self, config, input_ids,
                                                  token_type_ids, input_mask,
                                                  sequence_labels,
                                                  token_labels, choice_labels):
            model = TFBertForPreTraining(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            prediction_scores, seq_relationship_score = model(inputs)
            result = {
                "prediction_scores": prediction_scores.numpy(),
                "seq_relationship_score": seq_relationship_score.numpy(),
            }
            self.parent.assertListEqual(
                list(result["prediction_scores"].shape),
                [self.batch_size, self.seq_length, self.vocab_size])
            self.parent.assertListEqual(
                list(result["seq_relationship_score"].shape),
                [self.batch_size, 2])

        def create_and_check_bert_for_sequence_classification(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            config.num_labels = self.num_labels
            model = TFBertForSequenceClassification(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            (logits, ) = model(inputs)
            result = {
                "logits": logits.numpy(),
            }
            self.parent.assertListEqual(list(result["logits"].shape),
                                        [self.batch_size, self.num_labels])

        def create_and_check_bert_for_multiple_choice(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            config.num_choices = self.num_choices
            model = TFBertForMultipleChoice(config=config)
            multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1),
                                                 (1, self.num_choices, 1))
            multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1),
                                                 (1, self.num_choices, 1))
            multiple_choice_token_type_ids = tf.tile(
                tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
            inputs = {
                "input_ids": multiple_choice_inputs_ids,
                "attention_mask": multiple_choice_input_mask,
                "token_type_ids": multiple_choice_token_type_ids,
            }
            (logits, ) = model(inputs)
            result = {
                "logits": logits.numpy(),
            }
            self.parent.assertListEqual(list(result["logits"].shape),
                                        [self.batch_size, self.num_choices])

        def create_and_check_bert_for_token_classification(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            config.num_labels = self.num_labels
            model = TFBertForTokenClassification(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            (logits, ) = model(inputs)
            result = {
                "logits": logits.numpy(),
            }
            self.parent.assertListEqual(
                list(result["logits"].shape),
                [self.batch_size, self.seq_length, self.num_labels])

        def create_and_check_bert_for_question_answering(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            model = TFBertForQuestionAnswering(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            start_logits, end_logits = model(inputs)
            result = {
                "start_logits": start_logits.numpy(),
                "end_logits": end_logits.numpy(),
            }
            self.parent.assertListEqual(list(result["start_logits"].shape),
                                        [self.batch_size, self.seq_length])
            self.parent.assertListEqual(list(result["end_logits"].shape),
                                        [self.batch_size, self.seq_length])

        def prepare_config_and_inputs_for_common(self):
            config_and_inputs = self.prepare_config_and_inputs()
            (
                config,
                input_ids,
                token_type_ids,
                input_mask,
                sequence_labels,
                token_labels,
                choice_labels,
            ) = config_and_inputs
            inputs_dict = {
                "input_ids": input_ids,
                "token_type_ids": token_type_ids,
                "attention_mask": input_mask
            }
            return config, inputs_dict

    def setUp(self):
        self.model_tester = TFBertModelTest.TFBertModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=BertConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_bert_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_model(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_masked_lm(
            *config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_multiple_choice(
            *config_and_inputs)

    def test_for_next_sequence_prediction(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_next_sequence_prediction(
            *config_and_inputs)

    def test_for_pretraining(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_pretraining(
            *config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_question_answering(
            *config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_sequence_classification(
            *config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_token_classification(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
        for model_name in ["bert-base-uncased"]:
            model = TFBertModel.from_pretrained(model_name,
                                                cache_dir=CACHE_DIR)
            self.assertIsNotNone(model)
class TFBlenderbotModelTest(TFModelTesterMixin, unittest.TestCase):
    all_model_classes = (TFBlenderbotForConditionalGeneration,
                         TFBlenderbotModel) if is_tf_available() else ()
    all_generative_model_classes = (
        TFBlenderbotForConditionalGeneration, ) if is_tf_available() else ()
    is_encoder_decoder = True
    test_pruning = False
    test_onnx = False

    def setUp(self):
        self.model_tester = TFBlenderbotModelTester(self)
        self.config_tester = ConfigTester(self, config_class=BlenderbotConfig)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_decoder_model_past_large_inputs(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common(
        )
        self.model_tester.check_decoder_model_past_large_inputs(
            *config_and_inputs)

    def test_model_common_attributes(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            model = model_class(config)
            assert isinstance(model.get_input_embeddings(),
                              tf.keras.layers.Layer)

            if model_class in self.all_generative_model_classes:
                x = model.get_output_embeddings()
                assert isinstance(x, tf.keras.layers.Layer)
                name = model.get_bias()
                assert isinstance(name, dict)
                for k, v in name.items():
                    assert isinstance(v, tf.Variable)
            else:
                x = model.get_output_embeddings()
                assert x is None
                name = model.get_bias()
                assert name is None

    def test_saved_model_creation(self):
        # This test is too long (>30sec) and makes fail the CI
        pass

    def test_resize_token_embeddings(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        def _get_word_embedding_weight(model, embedding_layer):
            if hasattr(embedding_layer, "weight"):
                return embedding_layer.weight
            else:
                # Here we build the word embeddings weights if not exists.
                # And then we retry to get the attribute once built.
                model(model.dummy_inputs)
                if hasattr(embedding_layer, "weight"):
                    return embedding_layer.weight
                else:
                    return None

        for model_class in self.all_model_classes:
            for size in [config.vocab_size - 10, config.vocab_size + 10, None]:
                # build the embeddings
                model = model_class(config=config)
                old_input_embeddings = _get_word_embedding_weight(
                    model, model.get_input_embeddings())
                old_output_embeddings = _get_word_embedding_weight(
                    model, model.get_output_embeddings())
                old_final_logits_bias = model.get_bias()

                # reshape the embeddings
                model.resize_token_embeddings(size)
                new_input_embeddings = _get_word_embedding_weight(
                    model, model.get_input_embeddings())
                new_output_embeddings = _get_word_embedding_weight(
                    model, model.get_output_embeddings())
                new_final_logits_bias = model.get_bias()

                # check that the resized embeddings size matches the desired size.
                assert_size = size if size is not None else config.vocab_size

                self.assertEqual(new_input_embeddings.shape[0], assert_size)

                # check that weights remain the same after resizing
                models_equal = True
                for p1, p2 in zip(old_input_embeddings.value(),
                                  new_input_embeddings.value()):
                    if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
                        models_equal = False
                self.assertTrue(models_equal)

                if old_output_embeddings is not None and new_output_embeddings is not None:
                    self.assertEqual(new_output_embeddings.shape[0],
                                     assert_size)

                    models_equal = True
                    for p1, p2 in zip(old_output_embeddings.value(),
                                      new_output_embeddings.value()):
                        if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
                            models_equal = False
                    self.assertTrue(models_equal)

                if old_final_logits_bias is not None and new_final_logits_bias is not None:
                    old_final_logits_bias = old_final_logits_bias[
                        "final_logits_bias"]
                    new_final_logits_bias = new_final_logits_bias[
                        "final_logits_bias"]
                    self.assertEqual(new_final_logits_bias.shape[0], 1)
                    self.assertEqual(new_final_logits_bias.shape[1],
                                     assert_size)

                    models_equal = True
                    for old, new in zip(old_final_logits_bias.value(),
                                        new_final_logits_bias.value()):
                        for p1, p2 in zip(old, new):
                            if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
                                models_equal = False
                    self.assertTrue(models_equal)
 def _model_is_tf(model_cls):
     return is_tf_available() and issubclass(model_cls,
                                             tf.keras.layers.Layer)
Exemple #7
0
class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        TFAlbertModel,
        TFAlbertForPreTraining,
        TFAlbertForMaskedLM,
        TFAlbertForSequenceClassification,
        TFAlbertForQuestionAnswering,
        TFAlbertForTokenClassification,
        TFAlbertForMultipleChoice,
    ) if is_tf_available() else ())
    test_head_masking = False

    def setUp(self):
        self.model_tester = TFAlbertModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=AlbertConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_albert_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_albert_model(*config_and_inputs)

    def test_for_pretraining(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_albert_for_pretraining(
            *config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_albert_for_masked_lm(
            *config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_albert_for_multiple_choice(
            *config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_albert_for_sequence_classification(
            *config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_albert_for_question_answering(
            *config_and_inputs)

    def test_model_common_attributes(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        list_lm_models = [TFAlbertForPreTraining, TFAlbertForMaskedLM]

        for model_class in self.all_model_classes:
            model = model_class(config)
            assert isinstance(model.get_input_embeddings(),
                              tf.keras.layers.Layer)

            if model_class in list_lm_models:
                x = model.get_output_embeddings()
                assert isinstance(x, tf.keras.layers.Layer)
                name = model.get_bias()
                assert isinstance(name, dict)
                for k, v in name.items():
                    assert isinstance(v, tf.Variable)
            else:
                x = model.get_output_embeddings()
                assert x is None
                name = model.get_bias()
                assert name is None

    def test_mixed_precision(self):
        # TODO JP: Make ALBERT float16 compliant
        pass

    @slow
    def test_model_from_pretrained(self):
        for model_name in TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = TFAlbertModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
Exemple #8
0
from __future__ import absolute_import, division, print_function, unicode_literals

import csv
import json
import os
from abc import ABC, abstractmethod
from contextlib import contextmanager
from itertools import groupby
from typing import Union, Optional, Tuple, List, Dict

import numpy as np

from transformers import AutoTokenizer, PreTrainedTokenizer, PretrainedConfig, \
    SquadExample, squad_convert_examples_to_features, is_tf_available, is_torch_available, logger

if is_tf_available():
    from transformers import TFAutoModel, TFAutoModelForSequenceClassification, \
        TFAutoModelForQuestionAnswering, TFAutoModelForTokenClassification

if is_torch_available():
    import torch
    from transformers import AutoModel, AutoModelForSequenceClassification, \
        AutoModelForQuestionAnswering, AutoModelForTokenClassification


class ArgumentHandler(ABC):
    """
    Base interface for handling varargs for each Pipeline
    """
    @abstractmethod
    def __call__(self, *args, **kwargs):
Exemple #9
0
    def __call__(self, *texts, **kwargs):
        """
        Args:
            We support multiple use-cases, the following are exclusive:
            X: sequence of SquadExample
            data: sequence of SquadExample
            question: (str, List[str]), batch of question(s) to map along with context
            context: (str, List[str]), batch of context(s) associated with the provided question keyword argument
        Returns:
            dict: {'answer': str, 'score": float, 'start": int, "end": int}
            answer: the textual answer in the intial context
            score: the score the current answer scored for the model
            start: the character index in the original string corresponding to the beginning of the answer' span
            end: the character index in the original string corresponding to the ending of the answer' span
        """
        # Set defaults values
        kwargs.setdefault('topk', 1)
        kwargs.setdefault('doc_stride', 128)
        kwargs.setdefault('max_answer_len', 15)
        kwargs.setdefault('max_seq_len', 384)
        kwargs.setdefault('max_question_len', 64)

        if kwargs['topk'] < 1:
            raise ValueError('topk parameter should be >= 1 (got {})'.format(
                kwargs['topk']))

        if kwargs['max_answer_len'] < 1:
            raise ValueError(
                'max_answer_len parameter should be >= 1 (got {})'.format(
                    kwargs['max_answer_len']))

        # Convert inputs to features
        examples = self._args_parser(*texts, **kwargs)
        features = squad_convert_examples_to_features(
            examples, self.tokenizer, kwargs['max_seq_len'],
            kwargs['doc_stride'], kwargs['max_question_len'], False)
        fw_args = self.inputs_for_model(features)

        # Manage tensor allocation on correct device
        with self.device_placement():
            if is_tf_available():
                import tensorflow as tf
                fw_args = {k: tf.constant(v) for (k, v) in fw_args.items()}
                start, end = self.model(fw_args)
                start, end = start.numpy(), end.numpy()
            else:
                import torch
                with torch.no_grad():
                    # Retrieve the score for the context tokens only (removing question tokens)
                    fw_args = {
                        k: torch.tensor(v)
                        for (k, v) in fw_args.items()
                    }
                    start, end = self.model(**fw_args)
                    start, end = start.cpu().numpy(), end.cpu().numpy()

        answers = []
        for (example, feature, start_, end_) in zip(examples, features, start,
                                                    end):
            # Normalize logits and spans to retrieve the answer
            start_ = np.exp(start_) / np.sum(np.exp(start_))
            end_ = np.exp(end_) / np.sum(np.exp(end_))

            # Mask padding and question
            start_, end_ = start_ * np.abs(np.array(feature.p_mask) -
                                           1), end_ * np.abs(
                                               np.array(feature.p_mask) - 1)

            # TODO : What happens if not possible
            # Mask CLS
            start_[0] = end_[0] = 0

            starts, ends, scores = self.decode(start_, end_, kwargs['topk'],
                                               kwargs['max_answer_len'])
            char_to_word = np.array(example.char_to_word_offset)

            # Convert the answer (tokens) back to the original text
            answers += [{
                'score':
                score.item(),
                'start':
                np.where(
                    char_to_word == feature.token_to_orig_map[s])[0][0].item(),
                'end':
                np.where(char_to_word == feature.token_to_orig_map[e])[0]
                [-1].item(),
                'answer':
                ' '.join(
                    example.doc_tokens[feature.token_to_orig_map[s]:feature.
                                       token_to_orig_map[e] + 1])
            } for s, e, score in zip(starts, ends, scores)]

        if len(answers) == 1:
            return answers[0]
        return answers
Exemple #10
0
class TFConvBertModelTest(TFModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        TFConvBertModel,
        TFConvBertForMaskedLM,
        TFConvBertForQuestionAnswering,
        TFConvBertForSequenceClassification,
        TFConvBertForTokenClassification,
        TFConvBertForMultipleChoice,
    ) if is_tf_available() else ())
    test_pruning = False
    test_head_masking = False

    def setUp(self):
        self.model_tester = TFConvBertModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=ConvBertConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_multiple_choice(
            *config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_question_answering(
            *config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_sequence_classification(
            *config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_token_classification(
            *config_and_inputs)

    @slow
    def test_saved_model_with_attentions_output(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.output_attentions = True
        config.output_hidden_states = False

        if hasattr(config, "use_cache"):
            config.use_cache = False

        encoder_seq_length = getattr(self.model_tester, "encoder_seq_length",
                                     self.model_tester.seq_length)
        encoder_key_length = getattr(self.model_tester, "key_length",
                                     encoder_seq_length)

        for model_class in self.all_model_classes:
            class_inputs_dict = self._prepare_for_class(
                inputs_dict, model_class)
            model = model_class(config)
            num_out = len(model(class_inputs_dict))

            with tempfile.TemporaryDirectory() as tmpdirname:
                model.save_pretrained(tmpdirname, saved_model=True)
                model = tf.keras.models.load_model(
                    os.path.join(tmpdirname, "saved_model", "1"))
                outputs = model(class_inputs_dict)
                output = outputs["attentions"]

                self.assertEqual(len(outputs), num_out)
                self.assertEqual(len(output),
                                 self.model_tester.num_hidden_layers)
                self.assertListEqual(
                    list(output[0].shape[-3:]),
                    [
                        self.model_tester.num_attention_heads / 2,
                        encoder_seq_length, encoder_key_length
                    ],
                )

    def test_xla_mode(self):
        # TODO JP: Make ConvBert XLA compliant
        pass

    @slow
    def test_model_from_pretrained(self):
        model = TFConvBertModel.from_pretrained("YituTech/conv-bert-base")
        self.assertIsNotNone(model)

    def test_attention_outputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.return_dict = True
        decoder_seq_length = getattr(self.model_tester, "decoder_seq_length",
                                     self.model_tester.seq_length)
        encoder_seq_length = getattr(self.model_tester, "encoder_seq_length",
                                     self.model_tester.seq_length)
        decoder_key_length = getattr(self.model_tester, "key_length",
                                     decoder_seq_length)
        encoder_key_length = getattr(self.model_tester, "key_length",
                                     encoder_seq_length)

        def check_decoder_attentions_output(outputs):
            out_len = len(outputs)
            self.assertEqual(out_len % 2, 0)
            decoder_attentions = outputs.decoder_attentions
            self.assertEqual(len(decoder_attentions),
                             self.model_tester.num_hidden_layers)
            self.assertListEqual(
                list(decoder_attentions[0].shape[-3:]),
                [
                    self.model_tester.num_attention_heads / 2,
                    decoder_seq_length, decoder_key_length
                ],
            )

        def check_encoder_attentions_output(outputs):
            attentions = [
                t.numpy() for t in (outputs.encoder_attentions if config.
                                    is_encoder_decoder else outputs.attentions)
            ]
            self.assertEqual(len(attentions),
                             self.model_tester.num_hidden_layers)
            self.assertListEqual(
                list(attentions[0].shape[-3:]),
                [
                    self.model_tester.num_attention_heads / 2,
                    encoder_seq_length, encoder_key_length
                ],
            )

        for model_class in self.all_model_classes:
            inputs_dict["output_attentions"] = True
            inputs_dict["use_cache"] = False
            config.output_hidden_states = False
            model = model_class(config)
            outputs = model(self._prepare_for_class(inputs_dict, model_class))
            out_len = len(outputs)
            self.assertEqual(config.output_hidden_states, False)
            check_encoder_attentions_output(outputs)

            if self.is_encoder_decoder:
                model = model_class(config)
                outputs = model(
                    self._prepare_for_class(inputs_dict, model_class))
                self.assertEqual(config.output_hidden_states, False)
                check_decoder_attentions_output(outputs)

            # Check that output attentions can also be changed via the config
            del inputs_dict["output_attentions"]
            config.output_attentions = True
            model = model_class(config)
            outputs = model(self._prepare_for_class(inputs_dict, model_class))
            self.assertEqual(config.output_hidden_states, False)
            check_encoder_attentions_output(outputs)

            # Check attention is always last and order is fine
            inputs_dict["output_attentions"] = True
            config.output_hidden_states = True
            model = model_class(config)
            outputs = model(self._prepare_for_class(inputs_dict, model_class))

            self.assertEqual(out_len + (2 if self.is_encoder_decoder else 1),
                             len(outputs))
            self.assertEqual(model.config.output_hidden_states, True)
            check_encoder_attentions_output(outputs)
Exemple #11
0
class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase):

    all_model_classes = ((TFTransfoXLModel, TFTransfoXLLMHeadModel,
                          TFTransfoXLForSequenceClassification)
                         if is_tf_available() else ())
    all_generative_model_classes = () if is_tf_available() else ()
    # TODO: add this test when TFTransfoXLLMHead has a linear output layer implemented
    test_resize_embeddings = False
    test_head_masking = False
    test_onnx = False
    test_mismatched_shapes = False

    def setUp(self):
        self.model_tester = TFTransfoXLModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=TransfoXLConfig,
                                          d_embed=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_transfo_xl_model(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_transfo_xl_model(*config_and_inputs)

    def test_transfo_xl_lm_head(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_transfo_xl_lm_head(
            *config_and_inputs)

    def test_transfo_xl_sequence_classification_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_transfo_xl_for_sequence_classification(
            *config_and_inputs)

    def test_model_common_attributes(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        list_other_models_with_output_ebd = [
            TFTransfoXLForSequenceClassification
        ]

        for model_class in self.all_model_classes:
            model = model_class(config)
            assert isinstance(model.get_input_embeddings(),
                              tf.keras.layers.Layer)
            if model_class in list_other_models_with_output_ebd:
                x = model.get_output_embeddings()
                assert isinstance(x, tf.keras.layers.Layer)
                name = model.get_bias()
                assert name is None
            else:
                x = model.get_output_embeddings()
                assert x is None
                name = model.get_bias()
                assert name is None

    def test_xla_mode(self):
        # TODO JP: Make TransfoXL XLA compliant
        pass

    @slow
    def test_model_from_pretrained(self):
        for model_name in TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = TFTransfoXLModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):

    all_model_classes = (TFCTRLModel,
                         TFCTRLLMHeadModel) if is_tf_available() else ()

    class TFCTRLModelTester(object):
        def __init__(
            self,
            parent,
            batch_size=13,
            seq_length=7,
            is_training=True,
            use_token_type_ids=True,
            use_input_mask=True,
            use_labels=True,
            use_mc_token_ids=True,
            vocab_size=99,
            hidden_size=32,
            num_hidden_layers=5,
            num_attention_heads=4,
            intermediate_size=37,
            hidden_act="gelu",
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1,
            max_position_embeddings=512,
            type_vocab_size=16,
            type_sequence_label_size=2,
            initializer_range=0.02,
            num_labels=3,
            num_choices=4,
            scope=None,
        ):
            self.parent = parent
            self.batch_size = batch_size
            self.seq_length = seq_length
            self.is_training = is_training
            self.use_token_type_ids = use_token_type_ids
            self.use_input_mask = use_input_mask
            self.use_labels = use_labels
            self.use_mc_token_ids = use_mc_token_ids
            self.vocab_size = vocab_size
            self.hidden_size = hidden_size
            self.num_hidden_layers = num_hidden_layers
            self.num_attention_heads = num_attention_heads
            self.intermediate_size = intermediate_size
            self.hidden_act = hidden_act
            self.hidden_dropout_prob = hidden_dropout_prob
            self.attention_probs_dropout_prob = attention_probs_dropout_prob
            self.max_position_embeddings = max_position_embeddings
            self.type_vocab_size = type_vocab_size
            self.type_sequence_label_size = type_sequence_label_size
            self.initializer_range = initializer_range
            self.num_labels = num_labels
            self.num_choices = num_choices
            self.scope = scope

        def prepare_config_and_inputs(self):
            input_ids = ids_tensor([self.batch_size, self.seq_length],
                                   self.vocab_size)

            input_mask = None
            if self.use_input_mask:
                input_mask = ids_tensor([self.batch_size, self.seq_length],
                                        vocab_size=2)

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = ids_tensor([self.batch_size, self.seq_length],
                                            self.type_vocab_size)

            mc_token_ids = None
            if self.use_mc_token_ids:
                mc_token_ids = ids_tensor([self.batch_size, self.num_choices],
                                          self.seq_length)

            sequence_labels = None
            token_labels = None
            choice_labels = None
            if self.use_labels:
                sequence_labels = ids_tensor([self.batch_size],
                                             self.type_sequence_label_size)
                token_labels = ids_tensor([self.batch_size, self.seq_length],
                                          self.num_labels)
                choice_labels = ids_tensor([self.batch_size], self.num_choices)

            config = CTRLConfig(
                vocab_size_or_config_json_file=self.vocab_size,
                n_embd=self.hidden_size,
                n_layer=self.num_hidden_layers,
                n_head=self.num_attention_heads,
                # intermediate_size=self.intermediate_size,
                # hidden_act=self.hidden_act,
                # hidden_dropout_prob=self.hidden_dropout_prob,
                # attention_probs_dropout_prob=self.attention_probs_dropout_prob,
                n_positions=self.max_position_embeddings,
                n_ctx=self.max_position_embeddings
                # type_vocab_size=self.type_vocab_size,
                # initializer_range=self.initializer_range
            )

            head_mask = ids_tensor(
                [self.num_hidden_layers, self.num_attention_heads], 2)

            return config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels

        def create_and_check_ctrl_model(self, config, input_ids, input_mask,
                                        head_mask, token_type_ids, *args):
            model = TFCTRLModel(config=config)
            inputs = {
                'input_ids': input_ids,
                'attention_mask': input_mask,
                'token_type_ids': token_type_ids
            }
            sequence_output = model(inputs)[0]

            inputs = [input_ids, None,
                      input_mask]  # None is the data for 'past'
            sequence_output = model(inputs)[0]

            sequence_output = model(input_ids)[0]

            result = {
                "sequence_output": sequence_output.numpy(),
            }
            self.parent.assertListEqual(
                list(result["sequence_output"].shape),
                [self.batch_size, self.seq_length, self.hidden_size])

        def create_and_check_ctrl_lm_head(self, config, input_ids, input_mask,
                                          head_mask, token_type_ids, *args):
            model = TFCTRLLMHeadModel(config=config)
            inputs = {
                'input_ids': input_ids,
                'attention_mask': input_mask,
                'token_type_ids': token_type_ids
            }
            prediction_scores = model(inputs)[0]
            result = {
                "prediction_scores": prediction_scores.numpy(),
            }
            self.parent.assertListEqual(
                list(result["prediction_scores"].shape),
                [self.batch_size, self.seq_length, self.vocab_size])

        def prepare_config_and_inputs_for_common(self):
            config_and_inputs = self.prepare_config_and_inputs()

            (config, input_ids, input_mask, head_mask, token_type_ids,
             mc_token_ids, sequence_labels, token_labels,
             choice_labels) = config_and_inputs

            inputs_dict = {
                'input_ids': input_ids,
                'token_type_ids': token_type_ids,
                'attention_mask': input_mask
            }
            return config, inputs_dict

    def setUp(self):
        self.model_tester = TFCTRLModelTest.TFCTRLModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=CTRLConfig,
                                          n_embd=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_ctrl_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_ctrl_model(*config_and_inputs)

    def test_ctrl_lm_head(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_ctrl_lm_head(*config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        cache_dir = "/tmp/transformers_test/"
        for model_name in list(
                TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
            model = TFCTRLModel.from_pretrained(model_name,
                                                cache_dir=cache_dir)
            shutil.rmtree(cache_dir)
            self.assertIsNotNone(model)
def custom_encode_plus(sentence, tokenizer, return_tensors=None):
    # {'input_ids': [2, 10841, 10966, 10832, 10541, 21509, 27660, 18, 3], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0]}
    words = sentence.split()

    tokens = []
    tokens_mask = []

    for word in words:
        word_tokens = tokenizer.tokenize(word)
        if not word_tokens:
            word_tokens = [tokenizer.unk_token
                           ]  # For handling the bad-encoded word
        tokens.extend(word_tokens)
        tokens_mask.extend([1] + [0] * (len(word_tokens) - 1))

    ids = tokenizer.convert_tokens_to_ids(tokens)
    len_ids = len(ids)
    total_len = len_ids + tokenizer.num_special_tokens_to_add()
    if tokenizer.max_len and total_len > tokenizer.max_len:
        ids, _, _ = tokenizer.truncate_sequences(
            ids,
            pair_ids=None,
            num_tokens_to_remove=total_len - tokenizer.max_len,
            truncation_strategy="longest_first",
            stride=0,
        )

    sequence = tokenizer.build_inputs_with_special_tokens(ids)
    token_type_ids = tokenizer.create_token_type_ids_from_sequences(ids)
    # HARD-CODED: As I know, most of the transformers architecture will be `[CLS] + text + [SEP]``
    #             Only way to safely cover all the cases is to integrate `token mask builder` in internal library.
    tokens_mask = [1] + tokens_mask + [1]
    words = [tokenizer.cls_token] + words + [tokenizer.sep_token]

    encoded_inputs = {}
    encoded_inputs["input_ids"] = sequence
    encoded_inputs["token_type_ids"] = token_type_ids

    if return_tensors == "tf" and is_tf_available():
        encoded_inputs["input_ids"] = tf.constant(
            [encoded_inputs["input_ids"]])

        if "token_type_ids" in encoded_inputs:
            encoded_inputs["token_type_ids"] = tf.constant(
                [encoded_inputs["token_type_ids"]])

        if "attention_mask" in encoded_inputs:
            encoded_inputs["attention_mask"] = tf.constant(
                [encoded_inputs["attention_mask"]])

    elif return_tensors == "pt" and is_torch_available():
        encoded_inputs["input_ids"] = torch.tensor(
            [encoded_inputs["input_ids"]])

        if "token_type_ids" in encoded_inputs:
            encoded_inputs["token_type_ids"] = torch.tensor(
                [encoded_inputs["token_type_ids"]])

        if "attention_mask" in encoded_inputs:
            encoded_inputs["attention_mask"] = torch.tensor(
                [encoded_inputs["attention_mask"]])

    elif return_tensors is not None:
        logger.warning(
            "Unable to convert output to tensors format {}, PyTorch or TensorFlow is not available."
            .format(return_tensors))

    return encoded_inputs, words, tokens_mask
class TFLongformerModelTest(TFModelTesterMixin, unittest.TestCase):
    test_pruning = False  # pruning is not supported
    test_headmasking = False  # head masking is not supported
    test_torchscript = False

    all_model_classes = (
        (TFLongformerModel, TFLongformerForMaskedLM, TFLongformerForQuestionAnswering,) if is_tf_available() else ()
    )

    def setUp(self):
        self.model_tester = TFLongformerModelTester(self)
        self.config_tester = ConfigTester(self, config_class=LongformerConfig, hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_longformer_model_attention_mask_determinism(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_attention_mask_determinism(*config_and_inputs)

    def test_longformer_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_longformer_model(*config_and_inputs)

    def test_longformer_model_global_attention_mask(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_longformer_model_with_global_attention_mask(*config_and_inputs)

    def test_longformer_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_longformer_for_masked_lm(*config_and_inputs)

    def test_longformer_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_question_answering()
        self.model_tester.create_and_check_longformer_for_question_answering(*config_and_inputs)
Exemple #15
0
class TFBenchmarkTest(unittest.TestCase):
    def check_results_dict_not_empty(self, results):
        for model_result in results.values():
            for batch_size, sequence_length in zip(model_result["bs"],
                                                   model_result["ss"]):
                result = model_result["result"][batch_size][sequence_length]
                self.assertIsNotNone(result)

    def test_inference_no_configs_eager(self):
        MODEL_ID = "sshleifer/tiny-gpt2"
        benchmark_args = TensorFlowBenchmarkArguments(
            models=[MODEL_ID],
            training=False,
            inference=True,
            sequence_lengths=[8],
            batch_sizes=[1],
            eager_mode=True,
            multi_process=False,
        )
        benchmark = TensorFlowBenchmark(benchmark_args)
        results = benchmark.run()
        self.check_results_dict_not_empty(results.time_inference_result)
        self.check_results_dict_not_empty(results.memory_inference_result)

    def test_inference_no_configs_only_pretrain(self):
        MODEL_ID = "sgugger/tiny-distilbert-classification"
        benchmark_args = TensorFlowBenchmarkArguments(
            models=[MODEL_ID],
            training=False,
            inference=True,
            sequence_lengths=[8],
            batch_sizes=[1],
            multi_process=False,
            only_pretrain_model=True,
        )
        benchmark = TensorFlowBenchmark(benchmark_args)
        results = benchmark.run()
        self.check_results_dict_not_empty(results.time_inference_result)
        self.check_results_dict_not_empty(results.memory_inference_result)

    def test_inference_no_configs_graph(self):
        MODEL_ID = "sshleifer/tiny-gpt2"
        benchmark_args = TensorFlowBenchmarkArguments(
            models=[MODEL_ID],
            training=False,
            inference=True,
            sequence_lengths=[8],
            batch_sizes=[1],
            multi_process=False,
        )
        benchmark = TensorFlowBenchmark(benchmark_args)
        results = benchmark.run()
        self.check_results_dict_not_empty(results.time_inference_result)
        self.check_results_dict_not_empty(results.memory_inference_result)

    def test_inference_with_configs_eager(self):
        MODEL_ID = "sshleifer/tiny-gpt2"
        config = AutoConfig.from_pretrained(MODEL_ID)
        benchmark_args = TensorFlowBenchmarkArguments(
            models=[MODEL_ID],
            training=False,
            inference=True,
            sequence_lengths=[8],
            batch_sizes=[1],
            eager_mode=True,
            multi_process=False,
        )
        benchmark = TensorFlowBenchmark(benchmark_args, [config])
        results = benchmark.run()
        self.check_results_dict_not_empty(results.time_inference_result)
        self.check_results_dict_not_empty(results.memory_inference_result)

    def test_inference_with_configs_graph(self):
        MODEL_ID = "sshleifer/tiny-gpt2"
        config = AutoConfig.from_pretrained(MODEL_ID)
        benchmark_args = TensorFlowBenchmarkArguments(
            models=[MODEL_ID],
            training=False,
            inference=True,
            sequence_lengths=[8],
            batch_sizes=[1],
            multi_process=False,
        )
        benchmark = TensorFlowBenchmark(benchmark_args, [config])
        results = benchmark.run()
        self.check_results_dict_not_empty(results.time_inference_result)
        self.check_results_dict_not_empty(results.memory_inference_result)

    def test_train_no_configs(self):
        MODEL_ID = "sshleifer/tiny-gpt2"
        benchmark_args = TensorFlowBenchmarkArguments(
            models=[MODEL_ID],
            training=True,
            inference=False,
            sequence_lengths=[8],
            batch_sizes=[1],
            multi_process=False,
        )
        benchmark = TensorFlowBenchmark(benchmark_args)
        results = benchmark.run()
        self.check_results_dict_not_empty(results.time_train_result)
        self.check_results_dict_not_empty(results.memory_train_result)

    def test_train_with_configs(self):
        MODEL_ID = "sshleifer/tiny-gpt2"
        config = AutoConfig.from_pretrained(MODEL_ID)
        benchmark_args = TensorFlowBenchmarkArguments(
            models=[MODEL_ID],
            training=True,
            inference=False,
            sequence_lengths=[8],
            batch_sizes=[1],
            multi_process=False,
        )
        benchmark = TensorFlowBenchmark(benchmark_args, [config])
        results = benchmark.run()
        self.check_results_dict_not_empty(results.time_train_result)
        self.check_results_dict_not_empty(results.memory_train_result)

    def test_inference_encoder_decoder_with_configs(self):
        MODEL_ID = "patrickvonplaten/t5-tiny-random"
        config = AutoConfig.from_pretrained(MODEL_ID)
        benchmark_args = TensorFlowBenchmarkArguments(
            models=[MODEL_ID],
            training=False,
            inference=True,
            sequence_lengths=[8],
            batch_sizes=[1],
            multi_process=False,
        )
        benchmark = TensorFlowBenchmark(benchmark_args, configs=[config])
        results = benchmark.run()
        self.check_results_dict_not_empty(results.time_inference_result)
        self.check_results_dict_not_empty(results.memory_inference_result)

    @unittest.skipIf(is_tf_available()
                     and len(tf.config.list_physical_devices("GPU")) == 0,
                     "Cannot do xla on CPU.")
    def test_inference_no_configs_xla(self):
        MODEL_ID = "sshleifer/tiny-gpt2"
        benchmark_args = TensorFlowBenchmarkArguments(
            models=[MODEL_ID],
            training=False,
            inference=True,
            sequence_lengths=[8],
            batch_sizes=[1],
            use_xla=True,
            multi_process=False,
        )
        benchmark = TensorFlowBenchmark(benchmark_args)
        results = benchmark.run()
        self.check_results_dict_not_empty(results.time_inference_result)
        self.check_results_dict_not_empty(results.memory_inference_result)

    def test_save_csv_files(self):
        MODEL_ID = "sshleifer/tiny-gpt2"
        with tempfile.TemporaryDirectory() as tmp_dir:
            benchmark_args = TensorFlowBenchmarkArguments(
                models=[MODEL_ID],
                inference=True,
                save_to_csv=True,
                sequence_lengths=[8],
                batch_sizes=[1],
                inference_time_csv_file=os.path.join(tmp_dir, "inf_time.csv"),
                inference_memory_csv_file=os.path.join(tmp_dir, "inf_mem.csv"),
                env_info_csv_file=os.path.join(tmp_dir, "env.csv"),
                multi_process=False,
            )
            benchmark = TensorFlowBenchmark(benchmark_args)
            benchmark.run()
            self.assertTrue(
                Path(os.path.join(tmp_dir, "inf_time.csv")).exists())
            self.assertTrue(
                Path(os.path.join(tmp_dir, "inf_mem.csv")).exists())
            self.assertTrue(Path(os.path.join(tmp_dir, "env.csv")).exists())

    def test_trace_memory(self):
        MODEL_ID = "sshleifer/tiny-gpt2"

        def _check_summary_is_not_empty(summary):
            self.assertTrue(hasattr(summary, "sequential"))
            self.assertTrue(hasattr(summary, "cumulative"))
            self.assertTrue(hasattr(summary, "current"))
            self.assertTrue(hasattr(summary, "total"))

        with tempfile.TemporaryDirectory() as tmp_dir:
            benchmark_args = TensorFlowBenchmarkArguments(
                models=[MODEL_ID],
                inference=True,
                sequence_lengths=[8],
                batch_sizes=[1],
                log_filename=os.path.join(tmp_dir, "log.txt"),
                log_print=True,
                trace_memory_line_by_line=True,
                eager_mode=True,
                multi_process=False,
            )
            benchmark = TensorFlowBenchmark(benchmark_args)
            result = benchmark.run()
            _check_summary_is_not_empty(result.inference_summary)
            self.assertTrue(Path(os.path.join(tmp_dir, "log.txt")).exists())
class TFXLNetModelTest(TFModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        TFXLNetModel,
        TFXLNetLMHeadModel,
        TFXLNetForSequenceClassification,
        TFXLNetForTokenClassification,
        TFXLNetForQuestionAnsweringSimple,
        TFXLNetForMultipleChoice,
    ) if is_tf_available() else ())
    all_generative_model_classes = (
        (TFXLNetLMHeadModel, ) if is_tf_available() else ()
    )  # TODO (PVP): Check other models whether language generation is also applicable
    test_head_masking = False
    test_onnx = False

    def setUp(self):
        self.model_tester = TFXLNetModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=XLNetConfig,
                                          d_inner=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_xlnet_base_model(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_base_model(*config_and_inputs)

    def test_xlnet_lm_head(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_lm_head(*config_and_inputs)

    def test_xlnet_sequence_classif(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_sequence_classif(
            *config_and_inputs)

    def test_xlnet_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_for_token_classification(
            *config_and_inputs)

    def test_xlnet_qa(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_qa(*config_and_inputs)

    def test_xlnet_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlnet_for_multiple_choice(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = TFXLNetModel.from_pretrained(model_name)
            self.assertIsNotNone(model)

    # overwrite since `TFXLNetLMHeadModel` doesn't cut logits/labels
    def test_loss_computation(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        for model_class in self.all_model_classes:
            model = model_class(config)
            if getattr(model, "hf_compute_loss", None):
                # The number of elements in the loss should be the same as the number of elements in the label
                prepared_for_class = self._prepare_for_class(
                    inputs_dict.copy(), model_class, return_labels=True)
                added_label = prepared_for_class[sorted(
                    list(prepared_for_class.keys() - inputs_dict.keys()),
                    reverse=True)[0]]
                loss_size = tf.size(added_label)

                # `TFXLNetLMHeadModel` doesn't cut logits/labels
                # if model.__class__ in get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING):
                #     # if loss is causal lm loss, labels are shift, so that one label per batch
                #     # is cut
                #     loss_size = loss_size - self.model_tester.batch_size

                # Test that model correctly compute the loss with kwargs
                prepared_for_class = self._prepare_for_class(
                    inputs_dict.copy(), model_class, return_labels=True)
                input_name = "input_ids" if "input_ids" in prepared_for_class else "pixel_values"
                input_ids = prepared_for_class.pop(input_name)

                loss = model(input_ids, **prepared_for_class)[0]
                self.assertEqual(loss.shape, [loss_size])

                # Test that model correctly compute the loss with a dict
                prepared_for_class = self._prepare_for_class(
                    inputs_dict.copy(), model_class, return_labels=True)
                loss = model(prepared_for_class)[0]
                self.assertEqual(loss.shape, [loss_size])

                # Test that model correctly compute the loss with a tuple
                prepared_for_class = self._prepare_for_class(
                    inputs_dict.copy(), model_class, return_labels=True)

                # Get keys that were added with the _prepare_for_class function
                label_keys = prepared_for_class.keys() - inputs_dict.keys()
                signature = inspect.signature(model.call).parameters
                signature_names = list(signature.keys())

                # Create a dictionary holding the location of the tensors in the tuple
                tuple_index_mapping = {0: input_name}
                for label_key in label_keys:
                    label_key_index = signature_names.index(label_key)
                    tuple_index_mapping[label_key_index] = label_key
                sorted_tuple_index_mapping = sorted(
                    tuple_index_mapping.items())
                # Initialize a list with their default values, update the values and convert to a tuple
                list_input = []

                for name in signature_names:
                    if name != "kwargs":
                        list_input.append(signature[name].default)

                for index, value in sorted_tuple_index_mapping:
                    list_input[index] = prepared_for_class[value]

                tuple_input = tuple(list_input)

                # Send to model
                loss = model(tuple_input[:-1])[0]

                self.assertEqual(loss.shape, [loss_size])
class PerformerAttentionTest(unittest.TestCase):
    # Check that setting attention_type='performer' actually makes the model use (TF)PerformerAttention
    def test_performer_models(self):
        def _model_is_tf(model_cls):
            return is_tf_available() and issubclass(model_cls,
                                                    tf.keras.layers.Layer)

        for model_class, model_config in performer_supporting_models_and_configs(
        ):
            try:
                model = model_class(model_config(attention_type='performer'))

            # The TapasModel requires the torch-scatter library, and we shouldn't fail this test just because
            # the user doesn't have that library installed
            except ImportError:
                pass
            else:
                with self.subTest(model=model_class):
                    self.assertIsNotNone(model)

                    # It turns out that it's very non-trivial to do this type of recursive iteration of sublayers in
                    # TensorFlow, so we just don't bother to do the check for those models
                    if not _model_is_tf(model_class):
                        self.assertTrue(
                            any((isinstance(module, PerformerAttention)
                                 for module in model.modules())))

    @require_torch
    def test_output_shape_pytorch(self):
        self._test_output_shape_for_library('pt')

    @require_tf
    def test_output_shape_tensorflow(self):
        self._test_output_shape_for_library('tf')

    @unittest.skipUnless(
        _run_nondeterministic_tests,
        "This can fail randomly if we draw an 'unlucky' set of features.")
    def test_softmax_noncausal_attention_output_pytorch(self):
        self._test_softmax_noncausal_attention_output_for_library('pt')

    @unittest.skipUnless(
        _run_nondeterministic_tests,
        "This can fail randomly if we draw an 'unlucky' set of features.")
    def test_softmax_noncausal_attention_output_tensorflow(self):
        self._test_softmax_noncausal_attention_output_for_library('tf')

    @unittest.skipUnless(is_torch_available() and is_tf_available(),
                         "Both PyTorch and TensorFlow must be available")
    @torch.no_grad()
    def test_pytorch_tensorflow_parity(self):
        for config, batch, seq_len in self._iterate_config_options():
            # This option leads to random test failures due to the TFPerformerAttention object randomly redrawing
            # features right after we set its features to be equal to those of the PyTorch object, so we just skip it
            if config.redraw_stochastically:
                continue

            try:
                pt_attention = PerformerAttention(config)
            except AssertionError:
                continue

            try:
                tf_attention = TFPerformerAttention(config)
            except AssertionError:
                continue

            # Copy the weights from the PyTorch object to the TensorFlow one
            for name, param in pt_attention.named_parameters():
                pt_value = param.data.numpy()

                # Get the corresponding param (tf.Variable) in the TensorFlow object
                obj = tf_attention
                for key in name.split('.'):
                    if key.isnumeric():
                        obj = obj[int(key)]
                    elif key == "weight":
                        # Note that we have to transpose the weights when converting to TF to get the same output
                        obj.kernel_initializer = tf.constant_initializer(
                            pt_value.T)
                    elif key == "bias":
                        obj.bias_initializer = tf.constant_initializer(
                            pt_value)
                    else:
                        obj = getattr(obj, key)

            # Test that the two modules produce the same output, within numerical error
            with self.subTest(**config.to_dict()):
                q, k, v = (torch.randn(batch, seq_len, config.d_model)
                           for _ in range(3))
                tf_q, tf_k, tf_v = (tf.constant(x.numpy()) for x in (q, k, v))
                pt_output = pt_attention(q, k, v)[0]

                tf_attention.random_features = tf.constant(
                    pt_attention.random_features.numpy())
                tf_output = tf_attention(tf_q, tf_k, tf_v)[0]

                self.assertTrue(
                    np.allclose(pt_output.numpy(),
                                tf_output.numpy(),
                                atol=2e-4))
                self.assertListEqual(list(pt_output.shape),
                                     [batch, seq_len, config.d_model])

    # Exhaustive grid search of possible config options (and a random search of batch sizes and seq lengths)
    @staticmethod
    def _iterate_config_options(
    ) -> Iterator[Tuple[PerformerAttentionConfig, int, int]]:
        param_names = ['kernel_type', 'orthogonal_feature_algorithm']
        legal_values = [PerformerKernel, OrthogonalFeatureAlgorithm
                        ]  # Enum classes are iterable

        # Get all boolean config options
        for x in fields(PerformerAttentionConfig):
            if x.type == bool:
                legal_values.append((False, True))
                param_names.append(x.name)

        for values in product(*legal_values):
            kwargs = dict(zip(param_names, values))

            d_model = random.randint(2, 10)
            batch_size = random.randint(1, 4)
            num_heads = random.choice([
                i for i in range(1, d_model) if not d_model % i
            ])  # Factors of d_model
            length = 1 if kwargs.get(
                'use_recurrent_decoding') else random.randint(1, 10)
            yield PerformerAttentionConfig(d_model=d_model,
                                           num_heads=num_heads,
                                           **kwargs), batch_size, length

    @torch.no_grad()
    def _test_output_shape_for_library(self, library: str = 'pt'):
        for config, batch_size, length in self._iterate_config_options():
            d_model = config.d_model

            # PyTorch specific stuff
            if library == 'pt':
                attn_class = PerformerAttention

                def rand_tensor_func():
                    return torch.randn(batch_size, length, d_model)

            # TensorFlow specific stuff
            else:
                attn_class = TFPerformerAttention

                def rand_tensor_func():
                    return tf.random.normal((batch_size, length, d_model))

            try:
                attention = attn_class(config)
            except AssertionError:
                # Skip illegal kwargs combinations
                pass
            else:
                with self.subTest(**config.to_dict()):
                    q, k, v = [rand_tensor_func() for _ in range(3)]
                    output = attention(q, k, v)[0]

                    self.assertListEqual(list(output.shape),
                                         [batch_size, length, d_model])

    def _test_softmax_noncausal_attention_output_for_library(
            self, library: str = 'pt'):
        batch_size = 1
        length = 10
        num_heads = 1
        dim = 10

        config = PerformerAttentionConfig(d_model=dim,
                                          num_heads=num_heads,
                                          kernel_type='exp',
                                          num_random_features=30000,
                                          use_linear_layers=False)
        # PyTorch-specific stuff
        if library == 'pt':
            pt_attention = PerformerAttention(config)

            q, k, v = [torch.randn(batch_size, length, dim) for _ in range(3)]
            performer_attention_output = pt_attention(q, k, v)[0]

            attention_scores = q @ k.transpose(-2, -1) / math.sqrt(float(dim))
            attention_scores = torch.nn.functional.softmax(attention_scores,
                                                           dim=1)

        # TensorFlow-specific stuff
        else:
            tf_attention = TFPerformerAttention(config)

            q, k, v = [
                tf.random.normal((batch_size, length, dim)) for _ in range(3)
            ]
            performer_attention_output = tf_attention(q, k, v)[0]

            attention_scores = q @ tf.linalg.matrix_transpose(k) / math.sqrt(
                float(dim))
            attention_scores = tf.nn.softmax(attention_scores, axis=1)

        softmax_output = (attention_scores @ v).numpy()

        errors = softmax_output - performer_attention_output.numpy()
        mse = np.mean(errors**2)
        bias = np.mean(errors)

        self.assertLess(mse, 0.1)
        self.assertLess(np.abs(bias), 0.025)
class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        TFMobileBertModel,
        TFMobileBertForMaskedLM,
        TFMobileBertForNextSentencePrediction,
        TFMobileBertForPreTraining,
        TFMobileBertForQuestionAnswering,
        TFMobileBertForSequenceClassification,
        TFMobileBertForTokenClassification,
        TFMobileBertForMultipleChoice,
    ) if is_tf_available() else ())
    test_head_masking = False
    test_onnx = False

    class TFMobileBertModelTester(object):
        def __init__(
            self,
            parent,
            batch_size=13,
            seq_length=7,
            is_training=True,
            use_input_mask=True,
            use_token_type_ids=True,
            use_labels=True,
            vocab_size=99,
            hidden_size=32,
            embedding_size=32,
            num_hidden_layers=5,
            num_attention_heads=4,
            intermediate_size=37,
            hidden_act="gelu",
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1,
            max_position_embeddings=512,
            type_vocab_size=16,
            type_sequence_label_size=2,
            initializer_range=0.02,
            num_labels=3,
            num_choices=4,
            scope=None,
        ):
            self.parent = parent
            self.batch_size = batch_size
            self.seq_length = seq_length
            self.is_training = is_training
            self.use_input_mask = use_input_mask
            self.use_token_type_ids = use_token_type_ids
            self.use_labels = use_labels
            self.vocab_size = vocab_size
            self.hidden_size = hidden_size
            self.num_hidden_layers = num_hidden_layers
            self.num_attention_heads = num_attention_heads
            self.intermediate_size = intermediate_size
            self.hidden_act = hidden_act
            self.hidden_dropout_prob = hidden_dropout_prob
            self.attention_probs_dropout_prob = attention_probs_dropout_prob
            self.max_position_embeddings = max_position_embeddings
            self.type_vocab_size = type_vocab_size
            self.type_sequence_label_size = type_sequence_label_size
            self.initializer_range = initializer_range
            self.num_labels = num_labels
            self.num_choices = num_choices
            self.scope = scope
            self.embedding_size = embedding_size

        def prepare_config_and_inputs(self):
            input_ids = ids_tensor([self.batch_size, self.seq_length],
                                   self.vocab_size)

            input_mask = None
            if self.use_input_mask:
                input_mask = ids_tensor([self.batch_size, self.seq_length],
                                        vocab_size=2)

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = ids_tensor([self.batch_size, self.seq_length],
                                            self.type_vocab_size)

            sequence_labels = None
            token_labels = None
            choice_labels = None
            if self.use_labels:
                sequence_labels = ids_tensor([self.batch_size],
                                             self.type_sequence_label_size)
                token_labels = ids_tensor([self.batch_size, self.seq_length],
                                          self.num_labels)
                choice_labels = ids_tensor([self.batch_size], self.num_choices)

            config = MobileBertConfig(
                vocab_size=self.vocab_size,
                hidden_size=self.hidden_size,
                num_hidden_layers=self.num_hidden_layers,
                num_attention_heads=self.num_attention_heads,
                intermediate_size=self.intermediate_size,
                hidden_act=self.hidden_act,
                hidden_dropout_prob=self.hidden_dropout_prob,
                attention_probs_dropout_prob=self.attention_probs_dropout_prob,
                max_position_embeddings=self.max_position_embeddings,
                type_vocab_size=self.type_vocab_size,
                initializer_range=self.initializer_range,
                embedding_size=self.embedding_size,
            )

            return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

        def create_and_check_mobilebert_model(self, config, input_ids,
                                              token_type_ids, input_mask,
                                              sequence_labels, token_labels,
                                              choice_labels):
            model = TFMobileBertModel(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            result = model(inputs)

            inputs = [input_ids, input_mask]
            result = model(inputs)

            result = model(input_ids)

            self.parent.assertEqual(
                result.last_hidden_state.shape,
                (self.batch_size, self.seq_length, self.hidden_size))
            self.parent.assertEqual(result.pooler_output.shape,
                                    (self.batch_size, self.hidden_size))

        def create_and_check_mobilebert_for_masked_lm(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            model = TFMobileBertForMaskedLM(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            result = model(inputs)
            self.parent.assertEqual(
                result.logits.shape,
                (self.batch_size, self.seq_length, self.vocab_size))

        def create_and_check_mobilebert_for_next_sequence_prediction(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            model = TFMobileBertForNextSentencePrediction(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            result = model(inputs)
            self.parent.assertEqual(result.logits.shape, (self.batch_size, 2))

        def create_and_check_mobilebert_for_pretraining(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            model = TFMobileBertForPreTraining(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            result = model(inputs)
            self.parent.assertEqual(
                result.prediction_logits.shape,
                (self.batch_size, self.seq_length, self.vocab_size))
            self.parent.assertEqual(result.seq_relationship_logits.shape,
                                    (self.batch_size, 2))

        def create_and_check_mobilebert_for_sequence_classification(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            config.num_labels = self.num_labels
            model = TFMobileBertForSequenceClassification(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            result = model(inputs)
            self.parent.assertEqual(result.logits.shape,
                                    (self.batch_size, self.num_labels))

        def create_and_check_mobilebert_for_multiple_choice(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            config.num_choices = self.num_choices
            model = TFMobileBertForMultipleChoice(config=config)
            multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1),
                                                 (1, self.num_choices, 1))
            multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1),
                                                 (1, self.num_choices, 1))
            multiple_choice_token_type_ids = tf.tile(
                tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
            inputs = {
                "input_ids": multiple_choice_inputs_ids,
                "attention_mask": multiple_choice_input_mask,
                "token_type_ids": multiple_choice_token_type_ids,
            }
            result = model(inputs)
            self.parent.assertEqual(result.logits.shape,
                                    (self.batch_size, self.num_choices))

        def create_and_check_mobilebert_for_token_classification(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            config.num_labels = self.num_labels
            model = TFMobileBertForTokenClassification(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            result = model(inputs)
            self.parent.assertEqual(
                result.logits.shape,
                (self.batch_size, self.seq_length, self.num_labels))

        def create_and_check_mobilebert_for_question_answering(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            model = TFMobileBertForQuestionAnswering(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            result = model(inputs)
            self.parent.assertEqual(result.start_logits.shape,
                                    (self.batch_size, self.seq_length))
            self.parent.assertEqual(result.end_logits.shape,
                                    (self.batch_size, self.seq_length))

        def prepare_config_and_inputs_for_common(self):
            config_and_inputs = self.prepare_config_and_inputs()
            (
                config,
                input_ids,
                token_type_ids,
                input_mask,
                sequence_labels,
                token_labels,
                choice_labels,
            ) = config_and_inputs
            inputs_dict = {
                "input_ids": input_ids,
                "token_type_ids": token_type_ids,
                "attention_mask": input_mask
            }
            return config, inputs_dict

    def setUp(self):
        self.model_tester = TFMobileBertModelTest.TFMobileBertModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=MobileBertConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_mobilebert_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_mobilebert_model(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_mobilebert_for_masked_lm(
            *config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_mobilebert_for_multiple_choice(
            *config_and_inputs)

    def test_for_next_sequence_prediction(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_mobilebert_for_next_sequence_prediction(
            *config_and_inputs)

    def test_for_pretraining(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_mobilebert_for_pretraining(
            *config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_mobilebert_for_question_answering(
            *config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_mobilebert_for_sequence_classification(
            *config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_mobilebert_for_token_classification(
            *config_and_inputs)

    def test_model_common_attributes(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        list_lm_models = [TFMobileBertForMaskedLM, TFMobileBertForPreTraining]

        for model_class in self.all_model_classes:
            model = model_class(config)
            assert isinstance(model.get_input_embeddings(),
                              tf.keras.layers.Layer)

            if model_class in list_lm_models:
                x = model.get_output_embeddings()
                assert isinstance(x, tf.keras.layers.Layer)
                name = model.get_bias()
                assert isinstance(name, dict)
                for k, v in name.items():
                    assert isinstance(v, tf.Variable)
            else:
                x = model.get_output_embeddings()
                assert x is None
                name = model.get_bias()
                assert name is None

    def test_saved_model_creation(self):
        # This test is too long (>30sec) and makes fail the CI
        pass

    def test_mixed_precision(self):
        # TODO JP: Make MobileBert float16 compliant
        pass

    @slow
    def test_model_from_pretrained(self):
        # for model_name in TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
        for model_name in ["google/mobilebert-uncased"]:
            model = TFMobileBertModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
Exemple #19
0
class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase):

    all_model_classes = ((TFRobertaModel, TFRobertaForMaskedLM,
                          TFRobertaForSequenceClassification)
                         if is_tf_available() else ())

    class TFRobertaModelTester(object):
        def __init__(
            self,
            parent,
            batch_size=13,
            seq_length=7,
            is_training=True,
            use_input_mask=True,
            use_token_type_ids=True,
            use_labels=True,
            vocab_size=99,
            hidden_size=32,
            num_hidden_layers=5,
            num_attention_heads=4,
            intermediate_size=37,
            hidden_act="gelu",
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1,
            max_position_embeddings=512,
            type_vocab_size=16,
            type_sequence_label_size=2,
            initializer_range=0.02,
            num_labels=3,
            num_choices=4,
            scope=None,
        ):
            self.parent = parent
            self.batch_size = batch_size
            self.seq_length = seq_length
            self.is_training = is_training
            self.use_input_mask = use_input_mask
            self.use_token_type_ids = use_token_type_ids
            self.use_labels = use_labels
            self.vocab_size = vocab_size
            self.hidden_size = hidden_size
            self.num_hidden_layers = num_hidden_layers
            self.num_attention_heads = num_attention_heads
            self.intermediate_size = intermediate_size
            self.hidden_act = hidden_act
            self.hidden_dropout_prob = hidden_dropout_prob
            self.attention_probs_dropout_prob = attention_probs_dropout_prob
            self.max_position_embeddings = max_position_embeddings
            self.type_vocab_size = type_vocab_size
            self.type_sequence_label_size = type_sequence_label_size
            self.initializer_range = initializer_range
            self.num_labels = num_labels
            self.num_choices = num_choices
            self.scope = scope

        def prepare_config_and_inputs(self):
            input_ids = ids_tensor([self.batch_size, self.seq_length],
                                   self.vocab_size)

            input_mask = None
            if self.use_input_mask:
                input_mask = ids_tensor([self.batch_size, self.seq_length],
                                        vocab_size=2)

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = ids_tensor([self.batch_size, self.seq_length],
                                            self.type_vocab_size)

            sequence_labels = None
            token_labels = None
            choice_labels = None
            if self.use_labels:
                sequence_labels = ids_tensor([self.batch_size],
                                             self.type_sequence_label_size)
                token_labels = ids_tensor([self.batch_size, self.seq_length],
                                          self.num_labels)
                choice_labels = ids_tensor([self.batch_size], self.num_choices)

            config = RobertaConfig(
                vocab_size=self.vocab_size,
                hidden_size=self.hidden_size,
                num_hidden_layers=self.num_hidden_layers,
                num_attention_heads=self.num_attention_heads,
                intermediate_size=self.intermediate_size,
                hidden_act=self.hidden_act,
                hidden_dropout_prob=self.hidden_dropout_prob,
                attention_probs_dropout_prob=self.attention_probs_dropout_prob,
                max_position_embeddings=self.max_position_embeddings,
                type_vocab_size=self.type_vocab_size,
                initializer_range=self.initializer_range,
            )

            return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

        def create_and_check_roberta_model(self, config, input_ids,
                                           token_type_ids, input_mask,
                                           sequence_labels, token_labels,
                                           choice_labels):
            model = TFRobertaModel(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            sequence_output = model(inputs)[0]

            inputs = [input_ids, input_mask]
            sequence_output = model(inputs)[0]

            sequence_output = model(input_ids)[0]

            result = {
                "sequence_output": sequence_output.numpy(),
            }
            self.parent.assertListEqual(
                list(result["sequence_output"].shape),
                [self.batch_size, self.seq_length, self.hidden_size])

        def create_and_check_roberta_for_masked_lm(self, config, input_ids,
                                                   token_type_ids, input_mask,
                                                   sequence_labels,
                                                   token_labels,
                                                   choice_labels):
            model = TFRobertaForMaskedLM(config=config)
            prediction_scores = model([input_ids, input_mask,
                                       token_type_ids])[0]
            result = {
                "prediction_scores": prediction_scores.numpy(),
            }
            self.parent.assertListEqual(
                list(result["prediction_scores"].shape),
                [self.batch_size, self.seq_length, self.vocab_size])

        def create_and_check_roberta_for_token_classification(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            config.num_labels = self.num_labels
            model = TFRobertaForTokenClassification(config=config)
            inputs = {
                "input_ids": input_ids,
                "attention_mask": input_mask,
                "token_type_ids": token_type_ids
            }
            (logits, ) = model(inputs)
            result = {
                "logits": logits.numpy(),
            }
            self.parent.assertListEqual(
                list(result["logits"].shape),
                [self.batch_size, self.seq_length, self.num_labels])

        def prepare_config_and_inputs_for_common(self):
            config_and_inputs = self.prepare_config_and_inputs()
            (
                config,
                input_ids,
                token_type_ids,
                input_mask,
                sequence_labels,
                token_labels,
                choice_labels,
            ) = config_and_inputs
            inputs_dict = {
                "input_ids": input_ids,
                "token_type_ids": token_type_ids,
                "attention_mask": input_mask
            }
            return config, inputs_dict

    def setUp(self):
        self.model_tester = TFRobertaModelTest.TFRobertaModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=RobertaConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_roberta_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_roberta_model(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_roberta_for_masked_lm(
            *config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_roberta_for_token_classification(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in list(
                TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
            model = TFRobertaModel.from_pretrained(model_name,
                                                   cache_dir=CACHE_DIR)
            self.assertIsNotNone(model)
Exemple #20
0
class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        TFRobertaModel,
        TFRobertaForCausalLM,
        TFRobertaForMaskedLM,
        TFRobertaForSequenceClassification,
        TFRobertaForTokenClassification,
        TFRobertaForQuestionAnswering,
    ) if is_tf_available() else ())
    test_head_masking = False
    test_onnx = False

    def setUp(self):
        self.model_tester = TFRobertaModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=RobertaConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        """Test the base model"""
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_causal_lm_base_model(self):
        """Test the base model of the causal LM model

        is_deocder=True, no cross_attention, no encoder outputs
        """
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_causal_lm_base_model(
            *config_and_inputs)

    def test_model_as_decoder(self):
        """Test the base model as a decoder (of an encoder-decoder architecture)

        is_deocder=True + cross_attention + pass encoder outputs
        """
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder(
        )
        self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)

    def test_for_causal_lm(self):
        """Test the causal LM model"""
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_causal_lm_model(*config_and_inputs)

    def test_causal_lm_model_as_decoder(self):
        """Test the causal LM model as a decoder"""
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder(
        )
        self.model_tester.create_and_check_causal_lm_model_as_decoder(
            *config_and_inputs)

    def test_causal_lm_model_past(self):
        """Test causal LM model with `past_key_values`"""
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_causal_lm_model_past(
            *config_and_inputs)

    def test_causal_lm_model_past_with_attn_mask(self):
        """Test the causal LM model with `past_key_values` and `attention_mask`"""
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_causal_lm_model_past_with_attn_mask(
            *config_and_inputs)

    def test_causal_lm_model_past_with_large_inputs(self):
        """Test the causal LM model with `past_key_values` and a longer decoder sequence length"""
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_causal_lm_model_past_large_inputs(
            *config_and_inputs)

    def test_decoder_model_past_with_large_inputs(self):
        """Similar to `test_causal_lm_model_past_with_large_inputs` but with cross-attention"""
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder(
        )
        self.model_tester.create_and_check_decoder_model_past_large_inputs(
            *config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_token_classification(
            *config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_question_answering(
            *config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_multiple_choice(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = TFRobertaModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase):

    all_model_classes = (
        (TFGPT2Model, TFGPT2LMHeadModel, TFGPT2ForSequenceClassification, TFGPT2DoubleHeadsModel)
        if is_tf_available()
        else ()
    )
    all_generative_model_classes = (TFGPT2LMHeadModel,) if is_tf_available() else ()
    test_head_masking = False
    test_onnx = True
    onnx_min_opset = 10

    def setUp(self):
        self.model_tester = TFGPT2ModelTester(self)
        self.config_tester = ConfigTester(self, config_class=GPT2Config, n_embd=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_gpt2_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_model(*config_and_inputs)

    def test_gpt2_model_past(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_model_past(*config_and_inputs)

    def test_gpt2_model_att_mask_past(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_model_attention_mask_past(*config_and_inputs)

    def test_gpt2_model_past_large_inputs(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_model_past_large_inputs(*config_and_inputs)

    def test_gpt2_lm_head(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_lm_head(*config_and_inputs)

    def test_gpt2_double_head(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_double_head(*config_and_inputs)

    def test_model_common_attributes(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)

            if model_class in self.all_generative_model_classes:
                x = model.get_output_embeddings()
                assert isinstance(x, tf.keras.layers.Layer)
                name = model.get_bias()
                assert name is None
            else:
                x = model.get_output_embeddings()
                assert x is None
                name = model.get_bias()
                assert name is None

    def test_gpt2_sequence_classification_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_for_sequence_classification(*config_and_inputs)

    def test_mixed_precision(self):
        # TODO JP: Make GPT2 float16 compliant
        pass

    def test_xla_mode(self):
        # TODO JP: Make GPT2 XLA compliant
        pass

    @slow
    def test_model_from_pretrained(self):
        for model_name in TF_GPT2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = TFGPT2Model.from_pretrained(model_name)
            self.assertIsNotNone(model)
    def __call__(self,
                 text=None,
                 images=None,
                 padding="max_length",
                 return_tensors="np",
                 **kwargs):
        """
        Main method to prepare for the model one or several text(s) and image(s). This method forwards the `text` and
        `kwargs` arguments to CLIPTokenizerFast's [`~CLIPTokenizerFast.__call__`] if `text` is not `None` to encode:
        the text. To prepare the image(s), this method forwards the `images` and `kwrags` arguments to
        CLIPFeatureExtractor's [`~CLIPFeatureExtractor.__call__`] if `images` is not `None`. Please refer to the
        doctsring of the above two methods for more information.

        Args:
            text (`str`, `List[str]`, `List[List[str]]`):
                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
            images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`,
            `List[torch.Tensor]`):
                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
                tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a
                number of channels, H and W are image height and width.
            return_tensors (`str` or [`~utils.TensorType`], *optional*):
                If set, will return tensors of a particular framework. Acceptable values are:
                - `'tf'`: Return TensorFlow `tf.constant` objects.
                - `'pt'`: Return PyTorch `torch.Tensor` objects.
                - `'np'`: Return NumPy `np.ndarray` objects.
                - `'jax'`: Return JAX `jnp.ndarray` objects.
        Returns:
            [`BatchEncoding`]: A [`BatchEncoding`] with the following fields:
            - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`.
            - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
              `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
              `None`).
            - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
        """

        if text is None and images is None:
            raise ValueError(
                "You have to specify at least one text or image. Both cannot be none."
            )

        if text is not None:
            if isinstance(text, str) or (isinstance(text, List)
                                         and not isinstance(text[0], List)):
                encodings = [
                    self.tokenizer(text,
                                   padding=padding,
                                   return_tensors=return_tensors,
                                   **kwargs)
                ]

            elif isinstance(text, List) and isinstance(text[0], List):
                encodings = []

                # Maximum number of queries across batch
                max_num_queries = max([len(t) for t in text])

                # Pad all batch samples to max number of text queries
                for t in text:
                    if len(t) != max_num_queries:
                        t = t + [" "] * (max_num_queries - len(t))

                    encoding = self.tokenizer(t,
                                              padding=padding,
                                              return_tensors=return_tensors,
                                              **kwargs)
                    encodings.append(encoding)
            else:
                raise TypeError(
                    "Input text should be a string, a list of strings or a nested list of strings"
                )

            if return_tensors == "np":
                input_ids = np.concatenate(
                    [encoding["input_ids"] for encoding in encodings], axis=0)
                attention_mask = np.concatenate(
                    [encoding["attention_mask"] for encoding in encodings],
                    axis=0)

            elif return_tensors == "jax" and is_flax_available():
                import jax.numpy as jnp

                input_ids = jnp.concatenate(
                    [encoding["input_ids"] for encoding in encodings], axis=0)
                attention_mask = jnp.concatenate(
                    [encoding["attention_mask"] for encoding in encodings],
                    axis=0)

            elif return_tensors == "pt" and is_torch_available():
                import torch

                input_ids = torch.cat(
                    [encoding["input_ids"] for encoding in encodings], dim=0)
                attention_mask = torch.cat(
                    [encoding["attention_mask"] for encoding in encodings],
                    dim=0)

            elif return_tensors == "tf" and is_tf_available():
                import tensorflow as tf

                input_ids = tf.stack(
                    [encoding["input_ids"] for encoding in encodings], axis=0)
                attention_mask = tf.stack(
                    [encoding["attention_mask"] for encoding in encodings],
                    axis=0)

            else:
                raise ValueError(
                    "Target return tensor type could not be returned")

            encoding = BatchEncoding()
            encoding["input_ids"] = input_ids
            encoding["attention_mask"] = attention_mask

        if images is not None:
            image_features = self.feature_extractor(
                images, return_tensors=return_tensors, **kwargs)

        if text is not None and images is not None:
            encoding["pixel_values"] = image_features.pixel_values
            return encoding
        elif text is not None:
            return encoding
        else:
            return BatchEncoding(data=dict(**image_features),
                                 tensor_type=return_tensors)
Exemple #23
0
class TFLongformerModelTest(TFModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        TFLongformerModel,
        TFLongformerForMaskedLM,
        TFLongformerForQuestionAnswering,
        TFLongformerForSequenceClassification,
        TFLongformerForMultipleChoice,
        TFLongformerForTokenClassification,
    ) if is_tf_available() else ())

    def setUp(self):
        self.model_tester = TFLongformerModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=LongformerConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model_attention_mask_determinism(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_attention_mask_determinism(
            *config_and_inputs)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_model_global_attention_mask(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model_with_global_attention_mask(
            *config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_question_answering(
        )
        self.model_tester.create_and_check_for_question_answering(
            *config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_sequence_classification(
            *config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_token_classification(
            *config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_multiple_choice(
            *config_and_inputs)

    @slow
    def test_saved_model_with_attentions_output(self):
        # longformer has special attentions which are not
        # compatible in graph mode
        pass

    def test_saved_model_creation(self):
        # This test is too long (>30sec) and makes fail the CI
        pass
Exemple #24
0
class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase):

    all_model_classes = (
        (TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, TFOpenAIGPTDoubleHeadsModel) if is_tf_available() else ()
    )
    all_generative_model_classes = (
        (TFOpenAIGPTLMHeadModel,) if is_tf_available() else ()
    )  # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly

    class TFOpenAIGPTModelTester(object):
        def __init__(
            self,
            parent,
            batch_size=13,
            seq_length=7,
            is_training=True,
            use_token_type_ids=True,
            use_input_mask=True,
            use_labels=True,
            use_mc_token_ids=True,
            vocab_size=99,
            hidden_size=32,
            num_hidden_layers=5,
            num_attention_heads=4,
            intermediate_size=37,
            hidden_act="gelu",
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1,
            max_position_embeddings=512,
            type_vocab_size=16,
            type_sequence_label_size=2,
            initializer_range=0.02,
            num_labels=3,
            num_choices=4,
            scope=None,
        ):
            self.parent = parent
            self.batch_size = batch_size
            self.seq_length = seq_length
            self.is_training = is_training
            self.use_token_type_ids = use_token_type_ids
            self.use_input_mask = use_input_mask
            self.use_labels = use_labels
            self.use_mc_token_ids = use_mc_token_ids
            self.vocab_size = vocab_size
            self.hidden_size = hidden_size
            self.num_hidden_layers = num_hidden_layers
            self.num_attention_heads = num_attention_heads
            self.intermediate_size = intermediate_size
            self.hidden_act = hidden_act
            self.hidden_dropout_prob = hidden_dropout_prob
            self.attention_probs_dropout_prob = attention_probs_dropout_prob
            self.max_position_embeddings = max_position_embeddings
            self.type_vocab_size = type_vocab_size
            self.type_sequence_label_size = type_sequence_label_size
            self.initializer_range = initializer_range
            self.num_labels = num_labels
            self.num_choices = num_choices
            self.scope = scope

        def prepare_config_and_inputs(self):
            input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)

            input_mask = None
            if self.use_input_mask:
                input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)

            mc_token_ids = None
            if self.use_mc_token_ids:
                mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length)

            sequence_labels = None
            token_labels = None
            choice_labels = None
            if self.use_labels:
                sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
                token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
                choice_labels = ids_tensor([self.batch_size], self.num_choices)

            config = OpenAIGPTConfig(
                vocab_size=self.vocab_size,
                n_embd=self.hidden_size,
                n_layer=self.num_hidden_layers,
                n_head=self.num_attention_heads,
                # intermediate_size=self.intermediate_size,
                # hidden_act=self.hidden_act,
                # hidden_dropout_prob=self.hidden_dropout_prob,
                # attention_probs_dropout_prob=self.attention_probs_dropout_prob,
                n_positions=self.max_position_embeddings,
                n_ctx=self.max_position_embeddings
                # type_vocab_size=self.type_vocab_size,
                # initializer_range=self.initializer_range
            )

            head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)

            return (
                config,
                input_ids,
                input_mask,
                head_mask,
                token_type_ids,
                mc_token_ids,
                sequence_labels,
                token_labels,
                choice_labels,
            )

        def create_and_check_openai_gpt_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
            model = TFOpenAIGPTModel(config=config)
            inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
            sequence_output = model(inputs)[0]

            inputs = [input_ids, input_mask]
            sequence_output = model(inputs)[0]

            sequence_output = model(input_ids)[0]

            result = {
                "sequence_output": sequence_output.numpy(),
            }
            self.parent.assertListEqual(
                list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
            )

        def create_and_check_openai_gpt_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
            model = TFOpenAIGPTLMHeadModel(config=config)
            inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
            prediction_scores = model(inputs)[0]
            result = {
                "prediction_scores": prediction_scores.numpy(),
            }
            self.parent.assertListEqual(
                list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
            )

        def create_and_check_openai_gpt_double_head(
            self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args
        ):
            model = TFOpenAIGPTDoubleHeadsModel(config=config)

            multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1))
            multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))
            multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))

            inputs = {
                "input_ids": multiple_choice_inputs_ids,
                "mc_token_ids": mc_token_ids,
                "attention_mask": multiple_choice_input_mask,
                "token_type_ids": multiple_choice_token_type_ids,
            }
            lm_logits, mc_logits = model(inputs)[:2]
            result = {"lm_logits": lm_logits.numpy(), "mc_logits": mc_logits.numpy()}
            self.parent.assertListEqual(
                list(result["lm_logits"].shape), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size]
            )
            self.parent.assertListEqual(list(result["mc_logits"].shape), [self.batch_size, self.num_choices])

        def prepare_config_and_inputs_for_common(self):
            config_and_inputs = self.prepare_config_and_inputs()

            (
                config,
                input_ids,
                input_mask,
                head_mask,
                token_type_ids,
                mc_token_ids,
                sequence_labels,
                token_labels,
                choice_labels,
            ) = config_and_inputs

            inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
            return config, inputs_dict

    def setUp(self):
        self.model_tester = TFOpenAIGPTModelTest.TFOpenAIGPTModelTester(self)
        self.config_tester = ConfigTester(self, config_class=OpenAIGPTConfig, n_embd=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_openai_gpt_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_openai_gpt_model(*config_and_inputs)

    def test_openai_gpt_lm_head(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_openai_gpt_lm_head(*config_and_inputs)

    def test_openai_gpt_double_head(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_openai_gpt_double_head(*config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in list(TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
            model = TFOpenAIGPTModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
            self.assertIsNotNone(model)
class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase):

    all_model_classes = ((TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel,
                          TFOpenAIGPTDoubleHeadsModel,
                          TFOpenAIGPTForSequenceClassification)
                         if is_tf_available() else ())
    all_generative_model_classes = (
        (TFOpenAIGPTLMHeadModel, ) if is_tf_available() else ()
    )  # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
    test_head_masking = False

    def setUp(self):
        self.model_tester = TFOpenAIGPTModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=OpenAIGPTConfig,
                                          n_embd=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_openai_gpt_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_openai_gpt_model(*config_and_inputs)

    def test_openai_gpt_lm_head(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_openai_gpt_lm_head(
            *config_and_inputs)

    def test_openai_gpt_double_head(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_openai_gpt_double_head(
            *config_and_inputs)

    def test_model_common_attributes(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            model = model_class(config)
            assert isinstance(model.get_input_embeddings(),
                              tf.keras.layers.Layer)

            if model_class in self.all_generative_model_classes:
                x = model.get_output_embeddings()
                assert isinstance(x, tf.keras.layers.Layer)
                name = model.get_bias()
                assert name is None
            else:
                x = model.get_output_embeddings()
                assert x is None
                name = model.get_bias()
                assert name is None

    def test_openai_gpt_sequence_classification_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_openai_gpt_for_sequence_classification(
            *config_and_inputs)

    def test_mixed_precision(self):
        # TODO JP: Make OpenAIGPT float16 compliant
        pass

    def test_xla_mode(self):
        # TODO JP: Make OpenAIGPT XLA compliant
        pass

    @slow
    def test_model_from_pretrained(self):
        for model_name in TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = TFOpenAIGPTModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
class TFLEDModelTest(TFModelTesterMixin, unittest.TestCase):
    all_model_classes = (TFLEDForConditionalGeneration,
                         TFLEDModel) if is_tf_available() else ()
    all_generative_model_classes = (
        TFLEDForConditionalGeneration, ) if is_tf_available() else ()
    is_encoder_decoder = True
    test_pruning = False
    test_head_masking = False
    test_onnx = False

    def setUp(self):
        self.model_tester = TFLEDModelTester(self)
        self.config_tester = ConfigTester(self, config_class=LEDConfig)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_decoder_model_past_large_inputs(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common(
        )
        self.model_tester.check_decoder_model_past_large_inputs(
            *config_and_inputs)

    def test_model_common_attributes(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            model = model_class(config)
            assert isinstance(model.get_input_embeddings(),
                              tf.keras.layers.Layer)

            if model_class in self.all_generative_model_classes:
                x = model.get_output_embeddings()
                assert isinstance(x, tf.keras.layers.Layer)
                name = model.get_bias()
                assert isinstance(name, dict)
                for k, v in name.items():
                    assert isinstance(v, tf.Variable)
            else:
                x = model.get_output_embeddings()
                assert x is None
                name = model.get_bias()
                assert name is None

    def test_resize_token_embeddings(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        def _get_word_embedding_weight(model, embedding_layer):
            if hasattr(embedding_layer, "weight"):
                return embedding_layer.weight
            else:
                # Here we build the word embeddings weights if not exists.
                # And then we retry to get the attribute once built.
                model(model.dummy_inputs)
                if hasattr(embedding_layer, "weight"):
                    return embedding_layer.weight
                else:
                    return None

        for model_class in self.all_model_classes:
            for size in [config.vocab_size - 10, config.vocab_size + 10, None]:
                # build the embeddings
                model = model_class(config=config)
                old_input_embeddings = _get_word_embedding_weight(
                    model, model.get_input_embeddings())
                old_output_embeddings = _get_word_embedding_weight(
                    model, model.get_output_embeddings())
                old_final_logits_bias = model.get_bias()

                # reshape the embeddings
                model.resize_token_embeddings(size)
                new_input_embeddings = _get_word_embedding_weight(
                    model, model.get_input_embeddings())
                new_output_embeddings = _get_word_embedding_weight(
                    model, model.get_output_embeddings())
                new_final_logits_bias = model.get_bias()

                # check that the resized embeddings size matches the desired size.
                assert_size = size if size is not None else config.vocab_size

                self.assertEqual(new_input_embeddings.shape[0], assert_size)

                # check that weights remain the same after resizing
                models_equal = True
                for p1, p2 in zip(old_input_embeddings.value(),
                                  new_input_embeddings.value()):
                    if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
                        models_equal = False
                self.assertTrue(models_equal)

                if old_output_embeddings is not None and new_output_embeddings is not None:
                    self.assertEqual(new_output_embeddings.shape[0],
                                     assert_size)

                    models_equal = True
                    for p1, p2 in zip(old_output_embeddings.value(),
                                      new_output_embeddings.value()):
                        if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
                            models_equal = False
                    self.assertTrue(models_equal)

                if old_final_logits_bias is not None and new_final_logits_bias is not None:
                    old_final_logits_bias = old_final_logits_bias[
                        "final_logits_bias"]
                    new_final_logits_bias = new_final_logits_bias[
                        "final_logits_bias"]
                    self.assertEqual(new_final_logits_bias.shape[0], 1)
                    self.assertEqual(new_final_logits_bias.shape[1],
                                     assert_size)

                    models_equal = True
                    for old, new in zip(old_final_logits_bias.value(),
                                        new_final_logits_bias.value()):
                        for p1, p2 in zip(old, new):
                            if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:
                                models_equal = False
                    self.assertTrue(models_equal)

    def test_attention_outputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        inputs_dict["global_attention_mask"] = tf.zeros_like(
            inputs_dict["attention_mask"])
        num_global_attn_indices = 2
        inputs_dict["global_attention_mask"] = tf.where(
            tf.range(self.model_tester.seq_length)[None, :] <
            num_global_attn_indices,
            1,
            inputs_dict["global_attention_mask"],
        )

        config.return_dict = True
        seq_length = self.model_tester.seq_length
        encoder_seq_length = self.model_tester.encoder_seq_length

        def check_decoder_attentions_output(outputs):
            decoder_attentions = outputs.decoder_attentions
            self.assertEqual(len(decoder_attentions),
                             self.model_tester.num_hidden_layers)
            self.assertListEqual(
                list(decoder_attentions[0].shape[-3:]),
                [
                    self.model_tester.num_attention_heads, seq_length,
                    seq_length
                ],
            )

        def check_encoder_attentions_output(outputs):
            attentions = [t.numpy() for t in outputs.encoder_attentions]
            global_attentions = [
                t.numpy() for t in outputs.encoder_global_attentions
            ]
            self.assertEqual(len(attentions),
                             self.model_tester.num_hidden_layers)
            self.assertEqual(len(global_attentions),
                             self.model_tester.num_hidden_layers)
            self.assertListEqual(
                list(attentions[0].shape[-3:]),
                [
                    self.model_tester.num_attention_heads, encoder_seq_length,
                    seq_length
                ],
            )
            self.assertListEqual(
                list(global_attentions[0].shape[-3:]),
                [
                    self.model_tester.num_attention_heads, encoder_seq_length,
                    num_global_attn_indices
                ],
            )

        for model_class in self.all_model_classes:
            inputs_dict["output_attentions"] = True
            inputs_dict["use_cache"] = False
            config.output_hidden_states = False
            model = model_class(config)
            outputs = model(self._prepare_for_class(inputs_dict, model_class))
            out_len = len(outputs)
            self.assertEqual(config.output_hidden_states, False)
            check_encoder_attentions_output(outputs)

            if self.is_encoder_decoder:
                model = model_class(config)
                outputs = model(
                    self._prepare_for_class(inputs_dict, model_class))
                self.assertEqual(config.output_hidden_states, False)
                check_decoder_attentions_output(outputs)

            # Check that output attentions can also be changed via the config
            del inputs_dict["output_attentions"]
            config.output_attentions = True
            model = model_class(config)
            outputs = model(self._prepare_for_class(inputs_dict, model_class))
            self.assertEqual(config.output_hidden_states, False)
            check_encoder_attentions_output(outputs)

            # Check attention is always last and order is fine
            inputs_dict["output_attentions"] = True
            config.output_hidden_states = True
            model = model_class(config)
            outputs = model(self._prepare_for_class(inputs_dict, model_class))

            self.assertEqual(out_len + (2 if self.is_encoder_decoder else 1),
                             len(outputs))
            self.assertEqual(model.config.output_hidden_states, True)
            check_encoder_attentions_output(outputs)

    def test_xla_mode(self):
        # TODO JP: Make LED XLA compliant
        pass

    def test_saved_model_creation(self):
        # This test is too long (>30sec) and makes fail the CI
        pass
class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):

    is_encoder_decoder = True
    all_model_classes = (
        TFT5Model, TFT5ForConditionalGeneration) if is_tf_available() else ()
    all_generative_model_classes = (
        TFT5ForConditionalGeneration, ) if is_tf_available() else ()
    test_onnx = False

    def setUp(self):
        self.model_tester = TFT5ModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=T5Config,
                                          d_model=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_t5_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_t5_model(*config_and_inputs)

    def test_t5_model_v1_1(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        config = config_and_inputs[0]
        config.tie_word_embeddings = False
        config.feed_forward_proj = "gated-gelu"
        self.model_tester.create_and_check_t5_model(config,
                                                    *config_and_inputs[1:])

    def test_with_lm_head(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_t5_with_lm_head(*config_and_inputs)

    def test_t5_decoder_model_past(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_t5_decoder_model_past(
            *config_and_inputs)

    def test_t5_decoder_model_past_with_attn_mask(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_t5_decoder_model_attention_mask_past(
            *config_and_inputs)

    def test_t5_decoder_model_past_large_inputs(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_t5_decoder_model_past_large_inputs(
            *config_and_inputs)

    def test_model_common_attributes(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            model = model_class(config)
            assert isinstance(model.get_input_embeddings(),
                              tf.keras.layers.Layer)

            if model_class in self.all_generative_model_classes:
                x = model.get_output_embeddings()
                assert isinstance(x, tf.keras.layers.Layer)
                name = model.get_bias()
                assert name is None
            else:
                x = model.get_output_embeddings()
                assert x is None
                name = model.get_bias()
                assert name is None

    def test_saved_model_creation(self):
        # This test is too long (>30sec) and makes fail the CI
        pass

    def test_mixed_precision(self):
        # TODO JP: Make T5 float16 compliant
        pass

    def test_xla_mode(self):
        # TODO JP: Make T5 XLA compliant
        pass

    @slow
    def test_model_from_pretrained(self):
        model = TFT5Model.from_pretrained("t5-small")
        self.assertIsNotNone(model)
class TestTFMarianCommon(TFModelTesterMixin, unittest.TestCase):
    all_model_classes = (TFMarianMTModel, ) if is_tf_available() else ()
    all_generative_model_classes = (
        TFMarianMTModel, ) if is_tf_available() else ()
    model_tester_cls = ModelTester
    is_encoder_decoder = True
    test_pruning = False

    def setUp(self):
        self.model_tester = self.model_tester_cls(self)
        self.config_tester = ConfigTester(self, config_class=MarianConfig)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_inputs_embeds(self):
        # inputs_embeds not supported
        pass

    def test_saved_model_with_hidden_states_output(self):
        # Should be uncommented during patrick TF refactor
        pass

    def test_saved_model_with_attentions_output(self):
        pass

    def test_compile_tf_model(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5,
                                             epsilon=1e-08,
                                             clipnorm=1.0)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
        metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")

        model_class = self.all_generative_model_classes[0]
        input_ids = {
            "decoder_input_ids":
            tf.keras.Input(batch_shape=(2, 2000),
                           name="decoder_input_ids",
                           dtype="int32"),
            "input_ids":
            tf.keras.Input(batch_shape=(2, 2000),
                           name="input_ids",
                           dtype="int32"),
        }

        # Prepare our model
        model = model_class(config)
        model(self._prepare_for_class(
            inputs_dict, model_class))  # Model must be called before saving.
        # Let's load it from the disk to be sure we can use pre-trained weights
        with tempfile.TemporaryDirectory() as tmpdirname:
            model.save_pretrained(tmpdirname)
            model = model_class.from_pretrained(tmpdirname)

        outputs_dict = model(input_ids)
        hidden_states = outputs_dict[0]

        # Add a dense layer on top to test integration with other keras modules
        outputs = tf.keras.layers.Dense(2,
                                        activation="softmax",
                                        name="outputs")(hidden_states)

        # Compile extended model
        extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
        extended_model.compile(optimizer=optimizer,
                               loss=loss,
                               metrics=[metric])

    def test_model_common_attributes(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            model = model_class(config)
            assert isinstance(model.get_input_embeddings(),
                              tf.keras.layers.Layer)
            x = model.get_output_layer_with_bias()
            assert x is None
            name = model.get_prefix_bias_name()
            assert name is None
Exemple #29
0
class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):

    all_model_classes = (TFDistilBertModel, TFDistilBertForMaskedLM, TFDistilBertForQuestionAnswering,
                         TFDistilBertForSequenceClassification) if is_tf_available() else None
    test_pruning = True
    test_torchscript = True
    test_resize_embeddings = True
    test_head_masking = True

    class TFDistilBertModelTester(object):

        def __init__(self,
                     parent,
                     batch_size=13,
                     seq_length=7,
                     is_training=True,
                     use_input_mask=True,
                     use_token_type_ids=False,
                     use_labels=True,
                     vocab_size=99,
                     hidden_size=32,
                     num_hidden_layers=5,
                     num_attention_heads=4,
                     intermediate_size=37,
                     hidden_act="gelu",
                     hidden_dropout_prob=0.1,
                     attention_probs_dropout_prob=0.1,
                     max_position_embeddings=512,
                     type_vocab_size=16,
                     type_sequence_label_size=2,
                     initializer_range=0.02,
                     num_labels=3,
                     num_choices=4,
                     scope=None,
                    ):
            self.parent = parent
            self.batch_size = batch_size
            self.seq_length = seq_length
            self.is_training = is_training
            self.use_input_mask = use_input_mask
            self.use_token_type_ids = use_token_type_ids
            self.use_labels = use_labels
            self.vocab_size = vocab_size
            self.hidden_size = hidden_size
            self.num_hidden_layers = num_hidden_layers
            self.num_attention_heads = num_attention_heads
            self.intermediate_size = intermediate_size
            self.hidden_act = hidden_act
            self.hidden_dropout_prob = hidden_dropout_prob
            self.attention_probs_dropout_prob = attention_probs_dropout_prob
            self.max_position_embeddings = max_position_embeddings
            self.type_vocab_size = type_vocab_size
            self.type_sequence_label_size = type_sequence_label_size
            self.initializer_range = initializer_range
            self.num_labels = num_labels
            self.num_choices = num_choices
            self.scope = scope

        def prepare_config_and_inputs(self):
            input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)

            input_mask = None
            if self.use_input_mask:
                input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)

            sequence_labels = None
            token_labels = None
            choice_labels = None
            if self.use_labels:
                sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
                token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
                choice_labels = ids_tensor([self.batch_size], self.num_choices)

            config = DistilBertConfig(
                vocab_size=self.vocab_size,
                dim=self.hidden_size,
                n_layers=self.num_hidden_layers,
                n_heads=self.num_attention_heads,
                hidden_dim=self.intermediate_size,
                hidden_act=self.hidden_act,
                dropout=self.hidden_dropout_prob,
                attention_dropout=self.attention_probs_dropout_prob,
                max_position_embeddings=self.max_position_embeddings,
                initializer_range=self.initializer_range)

            return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels

        def create_and_check_distilbert_model(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels):
            model = TFDistilBertModel(config=config)
            inputs = {'input_ids': input_ids,
                      'attention_mask': input_mask}

            outputs = model(inputs)
            sequence_output = outputs[0]

            inputs = [input_ids, input_mask]

            (sequence_output,) = model(inputs)

            result = {
                "sequence_output": sequence_output.numpy(),
            }
            self.parent.assertListEqual(
                list(result["sequence_output"].shape),
                [self.batch_size, self.seq_length, self.hidden_size])

        def create_and_check_distilbert_for_masked_lm(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels):
            model = TFDistilBertForMaskedLM(config=config)
            inputs = {'input_ids': input_ids,
                      'attention_mask': input_mask}
            (prediction_scores,) = model(inputs)
            result = {
                "prediction_scores": prediction_scores.numpy(),
            }
            self.parent.assertListEqual(
                list(result["prediction_scores"].shape),
                [self.batch_size, self.seq_length, self.vocab_size])

        def create_and_check_distilbert_for_question_answering(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels):
            model = TFDistilBertForQuestionAnswering(config=config)
            inputs = {'input_ids': input_ids,
                      'attention_mask': input_mask}
            start_logits, end_logits = model(inputs)
            result = {
                "start_logits": start_logits.numpy(),
                "end_logits": end_logits.numpy(),
            }
            self.parent.assertListEqual(
                list(result["start_logits"].shape),
                [self.batch_size, self.seq_length])
            self.parent.assertListEqual(
                list(result["end_logits"].shape),
                [self.batch_size, self.seq_length])

        def create_and_check_distilbert_for_sequence_classification(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels):
            config.num_labels = self.num_labels
            model = TFDistilBertForSequenceClassification(config)
            inputs = {'input_ids': input_ids,
                      'attention_mask': input_mask}
            (logits,) = model(inputs)
            result = {
                "logits": logits.numpy(),
            }
            self.parent.assertListEqual(
                list(result["logits"].shape),
                [self.batch_size, self.num_labels])

        def prepare_config_and_inputs_for_common(self):
            config_and_inputs = self.prepare_config_and_inputs()
            (config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs
            inputs_dict = {'input_ids': input_ids, 'attention_mask': input_mask}
            return config, inputs_dict

    def setUp(self):
        self.model_tester = TFDistilBertModelTest.TFDistilBertModelTester(self)
        self.config_tester = ConfigTester(self, config_class=DistilBertConfig, dim=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_distilbert_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_distilbert_model(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_distilbert_for_masked_lm(*config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_distilbert_for_question_answering(*config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_distilbert_for_sequence_classification(*config_and_inputs)
class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        TFElectraModel,
        TFElectraForMaskedLM,
        TFElectraForPreTraining,
        TFElectraForTokenClassification,
        TFElectraForMultipleChoice,
        TFElectraForSequenceClassification,
        TFElectraForQuestionAnswering,
    ) if is_tf_available() else ())

    def setUp(self):
        self.model_tester = TFElectraModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=ElectraConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_electra_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_electra_model(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_electra_for_masked_lm(
            *config_and_inputs)

    def test_for_pretraining(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_electra_for_pretraining(
            *config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_electra_for_question_answering(
            *config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_electra_for_sequence_classification(
            *config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_electra_for_multiple_choice(
            *config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_electra_for_token_classification(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        # for model_name in TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
        for model_name in ["google/electra-small-discriminator"]:
            model = TFElectraModel.from_pretrained(model_name)
            self.assertIsNotNone(model)