Ejemplo n.º 1
0
def _get_bert_config_or_encoder_config(use_bert_config,
                                       hidden_size,
                                       num_hidden_layers,
                                       vocab_size=100):
    """Returns config args for export_tfhub_lib._create_model()."""
    if use_bert_config:
        bert_config = configs.BertConfig(vocab_size=vocab_size,
                                         hidden_size=hidden_size,
                                         intermediate_size=32,
                                         max_position_embeddings=128,
                                         num_attention_heads=2,
                                         num_hidden_layers=num_hidden_layers)
        encoder_config = None
    else:
        bert_config = None
        encoder_config = encoders.EncoderConfig(
            type="albert",
            albert=encoders.AlbertEncoderConfig(vocab_size=vocab_size,
                                                embedding_width=16,
                                                hidden_size=hidden_size,
                                                intermediate_size=32,
                                                max_position_embeddings=128,
                                                num_attention_heads=2,
                                                num_layers=num_hidden_layers,
                                                dropout_rate=0.1))

    return bert_config, encoder_config
    def _export_bert_tfhub(self):
        bert_config = configs.BertConfig(vocab_size=30522,
                                         hidden_size=16,
                                         intermediate_size=32,
                                         max_position_embeddings=128,
                                         num_attention_heads=2,
                                         num_hidden_layers=4)
        encoder = export_tfhub_lib.get_bert_encoder(bert_config)
        model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint")

        checkpoint = tf.train.Checkpoint(encoder=encoder)
        checkpoint.save(os.path.join(model_checkpoint_dir, "test"))
        model_checkpoint_path = tf.train.latest_checkpoint(
            model_checkpoint_dir)

        vocab_file = os.path.join(self.get_temp_dir(), "uncased_vocab.txt")
        with tf.io.gfile.GFile(vocab_file, "w") as f:
            f.write("dummy content")

        export_path = os.path.join(self.get_temp_dir(), "hub")
        export_tfhub_lib.export_model(
            export_path,
            bert_config=bert_config,
            encoder_config=None,
            model_checkpoint_path=model_checkpoint_path,
            vocab_file=vocab_file,
            do_lower_case=True,
            with_mlm=False)
        return export_path
 def setUp(self):
     super(BertModelsTest, self).setUp()
     self._bert_test_config = bert_configs.BertConfig(
         attention_probs_dropout_prob=0.0,
         hidden_act='gelu',
         hidden_dropout_prob=0.0,
         hidden_size=16,
         initializer_range=0.02,
         intermediate_size=32,
         max_position_embeddings=128,
         num_attention_heads=2,
         num_hidden_layers=2,
         type_vocab_size=2,
         vocab_size=30522)
Ejemplo n.º 4
0
    def test_export_model(self):
        # Exports a savedmodel for TF-Hub
        hidden_size = 16
        bert_config = configs.BertConfig(vocab_size=100,
                                         hidden_size=hidden_size,
                                         intermediate_size=32,
                                         max_position_embeddings=128,
                                         num_attention_heads=2,
                                         num_hidden_layers=1)
        labse_model, encoder = export_tfhub.create_labse_model(None,
                                                               bert_config,
                                                               normalize=True)
        model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint")
        checkpoint = tf.train.Checkpoint(encoder=encoder)
        checkpoint.save(os.path.join(model_checkpoint_dir, "test"))
        model_checkpoint_path = tf.train.latest_checkpoint(
            model_checkpoint_dir)

        vocab_file = os.path.join(self.get_temp_dir(), "uncased_vocab.txt")
        with tf.io.gfile.GFile(vocab_file, "w") as f:
            f.write("dummy content")

        hub_destination = os.path.join(self.get_temp_dir(), "hub")
        export_tfhub.export_labse_model(
            None,  # bert_tfhub_module
            bert_config,
            model_checkpoint_path,
            hub_destination,
            vocab_file,
            do_lower_case=True,
            normalize=True)

        # Restores a hub KerasLayer.
        hub_layer = hub.KerasLayer(hub_destination, trainable=True)

        if hasattr(hub_layer, "resolved_object"):
            # Checks meta attributes.
            self.assertTrue(hub_layer.resolved_object.do_lower_case.numpy())
            with tf.io.gfile.GFile(hub_layer.resolved_object.vocab_file.
                                   asset_path.numpy()) as f:
                self.assertEqual("dummy content", f.read())
        # Checks the hub KerasLayer.
        for source_weight, hub_weight in zip(labse_model.trainable_weights,
                                             hub_layer.trainable_weights):
            self.assertAllClose(source_weight.numpy(), hub_weight.numpy())

        seq_length = 10
        dummy_ids = np.zeros((2, seq_length), dtype=np.int32)
        hub_outputs = hub_layer([dummy_ids, dummy_ids, dummy_ids])
        source_outputs = labse_model([dummy_ids, dummy_ids, dummy_ids])

        self.assertEqual(hub_outputs["pooled_output"].shape, (2, hidden_size))
        self.assertEqual(hub_outputs["sequence_output"].shape,
                         (2, seq_length, hidden_size))
        for output_name in source_outputs:
            self.assertAllClose(hub_outputs[output_name].numpy(),
                                hub_outputs[output_name].numpy())

        # Test that training=True makes a difference (activates dropout).
        def _dropout_mean_stddev(training, num_runs=20):
            input_ids = np.array([[14, 12, 42, 95, 99]], np.int32)
            inputs = [
                input_ids,
                np.ones_like(input_ids),
                np.zeros_like(input_ids)
            ]
            outputs = np.concatenate([
                hub_layer(inputs, training=training)["pooled_output"]
                for _ in range(num_runs)
            ])
            return np.mean(np.std(outputs, axis=0))

        self.assertLess(_dropout_mean_stddev(training=False), 1e-6)
        self.assertGreater(_dropout_mean_stddev(training=True), 1e-3)

        # Test propagation of seq_length in shape inference.
        input_word_ids = tf.keras.layers.Input(shape=(seq_length, ),
                                               dtype=tf.int32)
        input_mask = tf.keras.layers.Input(shape=(seq_length, ),
                                           dtype=tf.int32)
        input_type_ids = tf.keras.layers.Input(shape=(seq_length, ),
                                               dtype=tf.int32)
        outputs = hub_layer([input_word_ids, input_mask, input_type_ids])
        self.assertEqual(outputs["pooled_output"].shape.as_list(),
                         [None, hidden_size])
        self.assertEqual(outputs["sequence_output"].shape.as_list(),
                         [None, seq_length, hidden_size])
Ejemplo n.º 5
0
    def test_forward_pass(self,
                          use_pointing=False,
                          query_transformer=False,
                          is_training=True):
        """Randomly generate and run different configuarations for Felix Tagger."""
        # Ensures reproducibility.

        # Setup.
        sequence_length = 7
        vocab_size = 11
        bert_hidden_size = 13
        bert_num_hidden_layers = 1
        bert_num_attention_heads = 1
        bert_intermediate_size = 4
        bert_type_vocab_size = 2
        bert_max_position_embeddings = sequence_length
        bert_encoder = networks.BertEncoder(
            vocab_size=vocab_size,
            hidden_size=bert_hidden_size,
            num_layers=bert_num_hidden_layers,
            num_attention_heads=bert_num_attention_heads,
            intermediate_size=bert_intermediate_size,
            sequence_length=sequence_length,
            max_sequence_length=bert_max_position_embeddings,
            type_vocab_size=bert_type_vocab_size)
        bert_config = configs.BertConfig(
            vocab_size,
            hidden_size=bert_hidden_size,
            num_hidden_layers=bert_num_hidden_layers,
            num_attention_heads=bert_num_attention_heads,
            intermediate_size=bert_intermediate_size,
            type_vocab_size=bert_type_vocab_size,
            max_position_embeddings=bert_max_position_embeddings)
        batch_size = 17
        edit_tags_size = 19
        bert_config.num_classes = edit_tags_size
        bert_config.query_size = 23
        bert_config.query_transformer = query_transformer

        tagger = felix_tagger.FelixTagger(bert_encoder,
                                          bert_config=bert_config,
                                          seq_length=sequence_length,
                                          use_pointing=use_pointing,
                                          is_training=is_training)

        # Create inputs.
        np.random.seed(42)
        input_word_ids = np.random.randint(vocab_size - 1,
                                           size=(batch_size, sequence_length))
        input_mask = np.random.randint(1, size=(batch_size, sequence_length))
        input_type_ids = np.ones((batch_size, sequence_length))
        edit_tags = np.random.randint(edit_tags_size - 2,
                                      size=(batch_size, sequence_length))

        # Run the model.
        if is_training:
            output = tagger(
                [input_word_ids, input_type_ids, input_mask, edit_tags])
        else:
            output = tagger([input_word_ids, input_type_ids, input_mask])

        # Check output shapes.
        if use_pointing:
            tag_logits, pointing_logits = output
            self.assertEqual(pointing_logits.shape,
                             (batch_size, sequence_length, sequence_length))
        else:
            tag_logits = output[0]
        self.assertEqual(tag_logits.shape,
                         (batch_size, sequence_length, edit_tags_size))