Exemple #1
0
    def __init__(self, params: BaseParams, problem_name: str, input_embeddings: tf.keras.layers.Layer=None, share_embedding=True) -> None:
        super(MaskLM, self).__init__(name=problem_name)
        self.params = params
        self.problem_name = problem_name

        if share_embedding is False:
            self.vocab_size = self.params.bert_config.vocab_size
            self.share_embedding = False
        else:
            self.vocab_size = input_embeddings.shape[0]
            embedding_size = input_embeddings.shape[-1]
            share_valid = (self.params.bert_config.hidden_size ==
                        embedding_size)
            if not share_valid and self.params.share_embedding:
                logger.warning(
                    'Share embedding is enabled but hidden_size != embedding_size')
            self.share_embedding = self.params.share_embedding & share_valid

        if self.share_embedding:
            self.share_embedding_layer = TFSharedEmbeddings(
                vocab_size=self.vocab_size, hidden_size=input_embeddings.shape[1])
            self.share_embedding_layer.build([1])
            self.share_embedding_layer.weight = input_embeddings
        else:
            self.share_embedding_layer = tf.keras.layers.Dense(self.vocab_size)
    def test_keras_save_load(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        # remove `return_loss` to make code work
        if self.__class__.__name__ == "TFCLIPModelTest":
            inputs_dict.pop("return_loss", None)

        tf_main_layer_classes = set(
            module_member for model_class in self.all_model_classes
            for module in (import_module(model_class.__module__), )
            for module_member_name in dir(module)
            if module_member_name.endswith("MainLayer")
            # This condition is required, since `modeling_tf_clip.py` has 3 classes whose names end with `MainLayer`.
            and module_member_name[:-len("MainLayer")] ==
            model_class.__name__[:-len("Model")]
            for module_member in (getattr(module, module_member_name), )
            if isinstance(module_member, type)
            and tf.keras.layers.Layer in module_member.__bases__
            and getattr(module_member, "_keras_serializable", False))
        for main_layer_class in tf_main_layer_classes:
            # T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
            if "T5" in main_layer_class.__name__:
                # Take the same values than in TFT5ModelTester for this shared layer
                shared = TFSharedEmbeddings(99, 32, name="shared")
                config.use_cache = inputs_dict.pop("use_cache", None)
                main_layer = main_layer_class(config, embed_tokens=shared)
            else:
                main_layer = main_layer_class(config)

            symbolic_inputs = {
                name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype)
                for name, tensor in inputs_dict.items()
            }

            model = tf.keras.Model(symbolic_inputs,
                                   outputs=main_layer(symbolic_inputs))
            outputs = model(inputs_dict)

            with tempfile.TemporaryDirectory() as tmpdirname:
                filepath = os.path.join(tmpdirname, "keras_model.h5")
                model.save(filepath)
                if "T5" in main_layer_class.__name__:
                    model = tf.keras.models.load_model(
                        filepath,
                        custom_objects={
                            main_layer_class.__name__: main_layer_class,
                            "TFSharedEmbeddings": TFSharedEmbeddings,
                        },
                    )
                else:
                    model = tf.keras.models.load_model(
                        filepath,
                        custom_objects={
                            main_layer_class.__name__: main_layer_class
                        })
                assert isinstance(model, tf.keras.Model)
                after_outputs = model(inputs_dict)
                self.assert_outputs_same(after_outputs, outputs)
    def test_keras_save_load(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        tf_main_layer_classes = set(
            module_member for model_class in self.all_model_classes
            for module in (import_module(model_class.__module__), )
            for module_member_name in dir(module)
            if module_member_name.endswith("MainLayer")
            for module_member in (getattr(module, module_member_name), )
            if isinstance(module_member, type)
            and tf.keras.layers.Layer in module_member.__bases__
            and getattr(module_member, "_keras_serializable", False))
        for main_layer_class in tf_main_layer_classes:
            # T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
            if "T5" in main_layer_class.__name__:
                # Take the same values than in TFT5ModelTester for this shared layer
                shared = TFSharedEmbeddings(99, 32, name="shared")
                main_layer = main_layer_class(config, embed_tokens=shared)
            else:
                main_layer = main_layer_class(config)
            symbolic_inputs = {
                name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype)
                for name, tensor in inputs_dict.items()
            }

            model = tf.keras.Model(symbolic_inputs,
                                   outputs=main_layer(symbolic_inputs))
            outputs = model(inputs_dict)

            with tempfile.TemporaryDirectory() as tmpdirname:
                filepath = os.path.join(tmpdirname, "keras_model.h5")
                model.save(filepath)
                if "T5" in main_layer_class.__name__:
                    model = tf.keras.models.load_model(
                        filepath,
                        custom_objects={
                            main_layer_class.__name__: main_layer_class,
                            "TFSharedEmbeddings": TFSharedEmbeddings,
                        },
                    )
                else:
                    model = tf.keras.models.load_model(
                        filepath,
                        custom_objects={
                            main_layer_class.__name__: main_layer_class
                        })
                assert isinstance(model, tf.keras.Model)
                after_outputs = model(inputs_dict)
                self.assert_outputs_same(after_outputs, outputs)
    def test_train_pipeline_custom_model(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        # head_mask and decoder_head_mask has different shapes than other input args
        if "head_mask" in inputs_dict:
            del inputs_dict["head_mask"]
        if "decoder_head_mask" in inputs_dict:
            del inputs_dict["decoder_head_mask"]
        if "cross_attn_head_mask" in inputs_dict:
            del inputs_dict["cross_attn_head_mask"]
        tf_main_layer_classes = set(
            module_member for model_class in self.all_model_classes
            for module in (import_module(model_class.__module__), )
            for module_member_name in dir(module)
            if module_member_name.endswith("MainLayer")
            for module_member in (getattr(module, module_member_name), )
            if isinstance(module_member, type)
            and tf.keras.layers.Layer in module_member.__bases__
            and getattr(module_member, "_keras_serializable", False))

        for main_layer_class in tf_main_layer_classes:
            # T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
            if "T5" in main_layer_class.__name__:
                # Take the same values than in TFT5ModelTester for this shared layer
                shared = TFSharedEmbeddings(self.model_tester.vocab_size,
                                            self.model_tester.hidden_size,
                                            name="shared")
                config.use_cache = False
                main_layer = main_layer_class(config, embed_tokens=shared)
            else:
                main_layer = main_layer_class(config)

            symbolic_inputs = {
                name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype)
                for name, tensor in inputs_dict.items()
            }

            if hasattr(self.model_tester, "num_labels"):
                num_labels = self.model_tester.num_labels
            else:
                num_labels = 2

            X = tf.data.Dataset.from_tensor_slices(
                (inputs_dict,
                 np.ones(
                     (self.model_tester.batch_size,
                      self.model_tester.seq_length, num_labels, 1)))).batch(1)

            hidden_states = main_layer(symbolic_inputs)[0]
            outputs = tf.keras.layers.Dense(num_labels,
                                            activation="softmax",
                                            name="outputs")(hidden_states)
            model = tf.keras.models.Model(inputs=symbolic_inputs,
                                          outputs=[outputs])

            model.compile(loss="binary_crossentropy",
                          optimizer="adam",
                          metrics=["binary_accuracy"])
            model.fit(X, epochs=1)

            with tempfile.TemporaryDirectory() as tmpdirname:
                filepath = os.path.join(tmpdirname, "keras_model.h5")
                model.save(filepath)
                if "T5" in main_layer_class.__name__:
                    model = tf.keras.models.load_model(
                        filepath,
                        custom_objects={
                            main_layer_class.__name__: main_layer_class,
                            "TFSharedEmbeddings": TFSharedEmbeddings,
                        },
                    )
                else:
                    model = tf.keras.models.load_model(
                        filepath,
                        custom_objects={
                            main_layer_class.__name__: main_layer_class
                        })
                assert isinstance(model, tf.keras.Model)
                model(inputs_dict)
Exemple #5
0
class MaskLM(tf.keras.Model):
    """Multimodal MLM top layer.
    """

    def __init__(self, params: BaseParams, problem_name: str, input_embeddings: tf.keras.layers.Layer=None, share_embedding=True) -> None:
        super(MaskLM, self).__init__(name=problem_name)
        self.params = params
        self.problem_name = problem_name

        if share_embedding is False:
            self.vocab_size = self.params.bert_config.vocab_size
            self.share_embedding = False
        else:
            self.vocab_size = input_embeddings.shape[0]
            embedding_size = input_embeddings.shape[-1]
            share_valid = (self.params.bert_config.hidden_size ==
                        embedding_size)
            if not share_valid and self.params.share_embedding:
                logger.warning(
                    'Share embedding is enabled but hidden_size != embedding_size')
            self.share_embedding = self.params.share_embedding & share_valid

        if self.share_embedding:
            self.share_embedding_layer = TFSharedEmbeddings(
                vocab_size=self.vocab_size, hidden_size=input_embeddings.shape[1])
            self.share_embedding_layer.build([1])
            self.share_embedding_layer.weight = input_embeddings
        else:
            self.share_embedding_layer = tf.keras.layers.Dense(self.vocab_size)

    def call(self, inputs):
        mode = get_phase()
        features, hidden_features = inputs

        # masking is done inside the model
        seq_hidden_feature = hidden_features['seq']
        if mode != PREDICT:
            positions = features['masked_lm_positions']

            # gather_indexes will flatten the seq hidden_states, we need to reshape
            # back to 3d tensor
            input_tensor = gather_indexes(seq_hidden_feature, positions)
            shape_tensor = tf.shape(positions)
            shape_list = tf.concat([shape_tensor, [seq_hidden_feature.shape.as_list()[-1]]], axis=0)
            input_tensor = tf.reshape(input_tensor, shape=shape_list)
            # set_shape to determin rank
            input_tensor.set_shape(
                [None, None, seq_hidden_feature.shape.as_list()[-1]])
        else:
            input_tensor = seq_hidden_feature
        if self.share_embedding:
            mlm_logits = self.share_embedding_layer(
                input_tensor, mode='linear')
        else:
            mlm_logits = self.share_embedding_layer(input_tensor)
        if mode != PREDICT:
            mlm_labels = features['masked_lm_ids']
            mlm_labels.set_shape([None, None])
            mlm_labels = pad_to_shape(from_tensor=mlm_labels, to_tensor=mlm_logits, axis=1)
            # compute loss
            mlm_loss = empty_tensor_handling_loss(
                mlm_labels,
                mlm_logits,
                tf.keras.losses.sparse_categorical_crossentropy
            )
            loss = nan_loss_handling(mlm_loss)
            self.add_loss(loss)

        return tf.nn.softmax(mlm_logits)