コード例 #1
0
 def setUp(self):
   super(FelixModelsTest, self).setUp()
   self._bert_test_config = bert_configs.BertConfig(
       attention_probs_dropout_prob=0.0,
       hidden_act='gelu',
       hidden_dropout_prob=0.0,
       hidden_size=16,
       initializer_range=0.02,
       intermediate_size=32,
       max_position_embeddings=128,
       num_attention_heads=2,
       num_hidden_layers=2,
       type_vocab_size=2,
       vocab_size=30522)
   self._bert_test_config.num_classes = 20
   self._bert_test_config.query_size = 23
   self._bert_test_config.query_transformer = True
コード例 #2
0
    def setUp(self):
        super().setUp()
        self.random_seed = 42

        self.num_classes = 10
        self.batch_size = 4
        self.seq_length = 4
        self.hidden_dim = 8
        self.num_heads = 2
        self.key_dim = self.hidden_dim // self.num_heads

        self.bert_test_config = bert_configs.BertConfig(
            attention_probs_dropout_prob=0.12,
            hidden_dropout_prob=0.34,
            hidden_act='gelu',
            hidden_size=self.hidden_dim,
            initializer_range=0.02,
            intermediate_size=self.hidden_dim,
            max_position_embeddings=self.seq_length,
            num_attention_heads=self.num_heads,
            num_hidden_layers=2,
            type_vocab_size=2,
            vocab_size=128)

        self.input_shape_3d = tf.TensorShape(
            (self.batch_size, self.seq_length, self.hidden_dim))
        self.input_shape_4d = tf.TensorShape(
            (self.batch_size, self.seq_length, self.num_heads, self.key_dim))

        # Layer arguments.
        self.sn_norm_multiplier = 0.05
        self.spec_norm_kwargs = dict(iteration=1000,
                                     norm_multiplier=self.sn_norm_multiplier)
        self.attention_kwargs = dict(num_heads=self.num_heads,
                                     key_dim=self.key_dim)
        self.feedforward_kwargs = dict(intermediate_size=128,
                                       intermediate_activation='gelu',
                                       dropout=0.1,
                                       use_layer_norm=True)
        self.gp_layer_kwargs = dict(num_inducing=32,
                                    gp_cov_momentum=0.999,
                                    gp_cov_ridge_penalty=1e-6)
コード例 #3
0
    def _export_bert_tfhub(self):
        bert_config = configs.BertConfig(vocab_size=30522,
                                         hidden_size=16,
                                         intermediate_size=32,
                                         max_position_embeddings=128,
                                         num_attention_heads=2,
                                         num_hidden_layers=1)
        _, encoder = export_tfhub.create_bert_model(bert_config)
        model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint")
        checkpoint = tf.train.Checkpoint(model=encoder)
        checkpoint.save(os.path.join(model_checkpoint_dir, "test"))
        model_checkpoint_path = tf.train.latest_checkpoint(
            model_checkpoint_dir)

        vocab_file = os.path.join(self.get_temp_dir(), "uncased_vocab.txt")
        with tf.io.gfile.GFile(vocab_file, "w") as f:
            f.write("dummy content")

        hub_destination = os.path.join(self.get_temp_dir(), "hub")
        export_tfhub.export_bert_tfhub(bert_config, model_checkpoint_path,
                                       hub_destination, vocab_file)
        return hub_destination
コード例 #4
0
  def __init__(
      self,
      uri='https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1',
      model_dir=None,
      seq_len=128,
      dropout_rate=0.1,
      initializer_range=0.02,
      learning_rate=3e-5,
      distribution_strategy='mirrored',
      num_gpus=-1,
      tpu='',
      trainable=True,
      do_lower_case=True,
      is_tf2=True,
      convert_from_saved_model_tf2=False):
    """Initialze an instance with model paramaters.

    Args:
      uri: TF-Hub path/url to Bert module.
      model_dir: The location of the model checkpoint files.
      seq_len: Length of the sequence to feed into the model.
      dropout_rate: The rate for dropout.
      initializer_range: The stdev of the truncated_normal_initializer for
        initializing all weight matrices.
      learning_rate: The initial learning rate for Adam.
      distribution_strategy:  A string specifying which distribution strategy to
        use. Accepted values are 'off', 'one_device', 'mirrored',
        'parameter_server', 'multi_worker_mirrored', and 'tpu' -- case
        insensitive. 'off' means not to use Distribution Strategy; 'tpu' means
        to use TPUStrategy using `tpu_address`.
      num_gpus: How many GPUs to use at each worker with the
        DistributionStrategies API. The default is -1, which means utilize all
        available GPUs.
      tpu: TPU address to connect to.
      trainable: boolean, whether pretrain layer is trainable.
      do_lower_case: boolean, whether to lower case the input text. Should be
        True for uncased models and False for cased models.
      is_tf2: boolean, whether the hub module is in TensorFlow 2.x format.
      convert_from_saved_model_tf2: Convert to TFLite from saved_model in TF
        2.x.
    """
    if compat.get_tf_behavior() not in self.compat_tf_versions:
      raise ValueError('Incompatible versions. Expect {}, but got {}.'.format(
          self.compat_tf_versions, compat.get_tf_behavior()))
    self.seq_len = seq_len
    self.dropout_rate = dropout_rate
    self.initializer_range = initializer_range
    self.learning_rate = learning_rate
    self.trainable = trainable

    self.model_dir = model_dir
    if self.model_dir is None:
      self.model_dir = tempfile.mkdtemp()

    num_gpus = get_num_gpus(num_gpus)
    self.strategy = distribution_utils.get_distribution_strategy(
        distribution_strategy=distribution_strategy,
        num_gpus=num_gpus,
        tpu_address=tpu)
    self.tpu = tpu
    self.uri = uri
    self.do_lower_case = do_lower_case
    self.is_tf2 = is_tf2

    self.bert_config = bert_configs.BertConfig(
        0,
        initializer_range=self.initializer_range,
        hidden_dropout_prob=self.dropout_rate)

    self.convert_from_saved_model_tf2 = convert_from_saved_model_tf2
    self.is_built = False
コード例 #5
0
    def test_export_tfhub(self):
        # Exports a savedmodel for TF-Hub
        hidden_size = 16
        bert_config = configs.BertConfig(vocab_size=100,
                                         hidden_size=hidden_size,
                                         intermediate_size=32,
                                         max_position_embeddings=128,
                                         num_attention_heads=2,
                                         num_hidden_layers=1)
        bert_model, encoder = export_tfhub.create_bert_model(bert_config)
        model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint")
        checkpoint = tf.train.Checkpoint(model=encoder)
        checkpoint.save(os.path.join(model_checkpoint_dir, "test"))
        model_checkpoint_path = tf.train.latest_checkpoint(
            model_checkpoint_dir)

        vocab_file = os.path.join(self.get_temp_dir(), "uncased_vocab.txt")
        with tf.io.gfile.GFile(vocab_file, "w") as f:
            f.write("dummy content")

        hub_destination = os.path.join(self.get_temp_dir(), "hub")
        export_tfhub.export_bert_tfhub(bert_config, model_checkpoint_path,
                                       hub_destination, vocab_file)

        # Restores a hub KerasLayer.
        hub_layer = hub.KerasLayer(hub_destination, trainable=True)

        if hasattr(hub_layer, "resolved_object"):
            # Checks meta attributes.
            self.assertTrue(hub_layer.resolved_object.do_lower_case.numpy())
            with tf.io.gfile.GFile(hub_layer.resolved_object.vocab_file.
                                   asset_path.numpy()) as f:
                self.assertEqual("dummy content", f.read())
        # Checks the hub KerasLayer.
        for source_weight, hub_weight in zip(bert_model.trainable_weights,
                                             hub_layer.trainable_weights):
            self.assertAllClose(source_weight.numpy(), hub_weight.numpy())

        seq_length = 10
        dummy_ids = np.zeros((2, seq_length), dtype=np.int32)
        hub_outputs = hub_layer([dummy_ids, dummy_ids, dummy_ids])
        source_outputs = bert_model([dummy_ids, dummy_ids, dummy_ids])

        # The outputs of hub module are "pooled_output" and "sequence_output",
        # while the outputs of encoder is in reversed order, i.e.,
        # "sequence_output" and "pooled_output".
        encoder_outputs = reversed(encoder([dummy_ids, dummy_ids, dummy_ids]))
        self.assertEqual(hub_outputs[0].shape, (2, hidden_size))
        self.assertEqual(hub_outputs[1].shape, (2, seq_length, hidden_size))
        for source_output, hub_output, encoder_output in zip(
                source_outputs, hub_outputs, encoder_outputs):
            self.assertAllClose(source_output.numpy(), hub_output.numpy())
            self.assertAllClose(source_output.numpy(), encoder_output.numpy())

        # Test that training=True makes a difference (activates dropout).
        def _dropout_mean_stddev(training, num_runs=20):
            input_ids = np.array([[14, 12, 42, 95, 99]], np.int32)
            inputs = [
                input_ids,
                np.ones_like(input_ids),
                np.zeros_like(input_ids)
            ]
            outputs = np.concatenate([
                hub_layer(inputs, training=training)[0]
                for _ in range(num_runs)
            ])
            return np.mean(np.std(outputs, axis=0))

        self.assertLess(_dropout_mean_stddev(training=False), 1e-6)
        self.assertGreater(_dropout_mean_stddev(training=True), 1e-3)

        # Test propagation of seq_length in shape inference.
        input_word_ids = tf.keras.layers.Input(shape=(seq_length, ),
                                               dtype=tf.int32)
        input_mask = tf.keras.layers.Input(shape=(seq_length, ),
                                           dtype=tf.int32)
        input_type_ids = tf.keras.layers.Input(shape=(seq_length, ),
                                               dtype=tf.int32)
        pooled_output, sequence_output = hub_layer(
            [input_word_ids, input_mask, input_type_ids])
        self.assertEqual(pooled_output.shape.as_list(), [None, hidden_size])
        self.assertEqual(sequence_output.shape.as_list(),
                         [None, seq_length, hidden_size])
コード例 #6
0
def create_config(config_dir: str) -> configs.BertConfig:
    """Load a BERT config object from directory."""
    with tf.io.gfile.GFile(config_dir) as config_file:
        bert_config = json.load(config_file)
    return configs.BertConfig(**bert_config)
コード例 #7
0
    def run_classifier(self, train_input_fn, validation_input_fn, epochs,
                       steps_per_epoch, validation_steps, num_classes):
        """Creates classifier and runs the classifier training."""
        if epochs is None:
            epochs = self.default_training_epochs

        bert_config = bert_configs.BertConfig(
            0,
            initializer_range=self.initializer_range,
            hidden_dropout_prob=self.dropout_rate)
        warmup_steps = int(epochs * steps_per_epoch * 0.1)
        initial_lr = self.learning_rate

        def _get_classifier_model():
            """Gets a classifier model."""
            classifier_model, core_model = (bert_models.classifier_model(
                bert_config,
                num_classes,
                self.seq_len,
                hub_module_url=self.uri))
            classifier_model.optimizer = optimization.create_optimizer(
                initial_lr, steps_per_epoch * epochs, warmup_steps)
            return classifier_model, core_model

        # During distributed training, loss used for gradient computation is
        # summed over from all replicas. When Keras compile/fit() API is used,
        # the fit() API internally normalizes the loss by dividing the loss by
        # the number of replicas used for computation. However, when custom
        # training loop is used this is not done automatically and should be
        # done manually by the end user.
        loss_multiplier = 1.0
        if self.scale_loss:
            loss_multiplier = 1.0 / self.strategy.num_replicas_in_sync

        loss_fn = self.get_classification_loss_fn(num_classes,
                                                  loss_factor=loss_multiplier)

        # Defines evaluation metrics function, which will create metrics in the
        # correct device and strategy scope.
        def metric_fn():
            return tf.keras.metrics.SparseCategoricalAccuracy('test_accuracy',
                                                              dtype=tf.float32)

        # Use user-defined loop to start training.
        tf.compat.v1.logging.info(
            'Training using customized training loop TF 2.0 '
            'with distribution strategy.')
        bert_model = model_training_utils.run_customized_training_loop(
            strategy=self.strategy,
            model_fn=_get_classifier_model,
            loss_fn=loss_fn,
            model_dir=self.model_dir,
            steps_per_epoch=steps_per_epoch,
            steps_per_loop=self.steps_per_loop,
            epochs=epochs,
            train_input_fn=train_input_fn,
            eval_input_fn=validation_input_fn,
            eval_steps=validation_steps,
            init_checkpoint=None,
            metric_fn=metric_fn,
            custom_callbacks=None,
            run_eagerly=False)

        # Used in evaluation.
        with self.strategy.scope():
            bert_model, _ = _get_classifier_model()
            checkpoint_path = tf.train.latest_checkpoint(self.model_dir)
            checkpoint = tf.train.Checkpoint(model=bert_model)
            checkpoint.restore(checkpoint_path).expect_partial()
            bert_model.compile(loss=loss_fn, metrics=[metric_fn()])
        return bert_model
コード例 #8
0
    def test_forward_pass(self,
                          use_pointing=False,
                          query_transformer=False,
                          is_training=True):
        """Randomly generate and run different configuarations for Felix Tagger."""
        # Ensures reproducibility.

        # Setup.
        sequence_length = 7
        vocab_size = 11
        bert_hidden_size = 13
        bert_num_hidden_layers = 1
        bert_num_attention_heads = 1
        bert_intermediate_size = 4
        bert_type_vocab_size = 2
        bert_max_position_embeddings = sequence_length
        bert_encoder = networks.BertEncoder(
            vocab_size=vocab_size,
            hidden_size=bert_hidden_size,
            num_layers=bert_num_hidden_layers,
            num_attention_heads=bert_num_attention_heads,
            intermediate_size=bert_intermediate_size,
            sequence_length=sequence_length,
            max_sequence_length=bert_max_position_embeddings,
            type_vocab_size=bert_type_vocab_size)
        bert_config = configs.BertConfig(
            vocab_size,
            hidden_size=bert_hidden_size,
            num_hidden_layers=bert_num_hidden_layers,
            num_attention_heads=bert_num_attention_heads,
            intermediate_size=bert_intermediate_size,
            type_vocab_size=bert_type_vocab_size,
            max_position_embeddings=bert_max_position_embeddings)
        batch_size = 17
        edit_tags_size = 19
        bert_config.num_classes = edit_tags_size
        bert_config.query_size = 23
        bert_config.query_transformer = query_transformer

        tagger = felix_tagger.FelixTagger(bert_encoder,
                                          bert_config=bert_config,
                                          seq_length=sequence_length,
                                          use_pointing=use_pointing,
                                          is_training=is_training)

        # Create inputs.
        np.random.seed(42)
        input_word_ids = np.random.randint(vocab_size - 1,
                                           size=(batch_size, sequence_length))
        input_mask = np.random.randint(1, size=(batch_size, sequence_length))
        input_type_ids = np.ones((batch_size, sequence_length))
        edit_tags = np.random.randint(edit_tags_size - 2,
                                      size=(batch_size, sequence_length))

        # Run the model.
        if is_training:
            output = tagger(
                [input_word_ids, input_type_ids, input_mask, edit_tags])
        else:
            output = tagger([input_word_ids, input_type_ids, input_mask])

        # Check output shapes.
        if use_pointing:
            tag_logits, pointing_logits = output
            self.assertEqual(pointing_logits.shape,
                             (batch_size, sequence_length, sequence_length))
        else:
            tag_logits = output[0]
        self.assertEqual(tag_logits.shape,
                         (batch_size, sequence_length, edit_tags_size))
コード例 #9
0
    def __init__(self, config: model_config.VanillaLinearVAECellConfig):
        self._gumbel_softmax_label_adjustment_multiplier = config.gumbel_softmax_label_adjustment_multiplier

        vocab_embeddings_initializer = None
        if config.shared_bert_embedding:
            shared_embedding_layer = _BERT(
                config.max_seq_length,
                bert_config=configs.BertConfig(
                    **config.shared_bert_embedding_config),
                trainable=config.trainable_embedding)
        else:
            # If word_embedding_path is specified, use the embedding size of the
            # pre-trained embeddings.
            if config.word_embedding_path:
                with tf.io.gfile.GFile(config.word_embedding_path,
                                       'rb') as embedding_file:
                    word_embedding = np.load(embedding_file)
                embedding_vocab_size, embed_size = word_embedding.shape
                if config.vocab_size != embedding_vocab_size:
                    raise ValueError(
                        'Expected consistent vocab size between vocab.txt and the '
                        'embedding, found {} and {}.'.format(
                            embedding_vocab_size, config.vocab_size))
                config.embed_size = embed_size
                vocab_embeddings_initializer = (
                    tf.keras.initializers.Constant(word_embedding))
            if config.shared_embedding:
                shared_embedding_layer = _Embedding(
                    INPUT_ID_NAME,
                    config.vocab_size,
                    config.embed_size,
                    embeddings_initializer=vocab_embeddings_initializer,
                    input_length=config.max_seq_length,
                    trainable=config.trainable_embedding)
            else:
                shared_embedding_layer = None

        encoder = DualRNNEncoder(
            vocab_size=config.vocab_size,
            embed_size=config.embed_size,
            max_seq_length=config.max_seq_length,
            hidden_size=config.encoder_hidden_size,
            num_layers=config.num_ecnoder_rnn_layers,
            dropout=config.dropout,
            cell_type=config.encoder_cell_type,
            embeddings_initializer=vocab_embeddings_initializer,
            shared_embedding_layer=shared_embedding_layer,
            trainable_embedding=config.trainable_embedding)
        sampler = utils.GumbelSoftmaxSampler(config.temperature, hard=False)

        decoder = DualRNNDecoder(
            vocab_size=config.vocab_size,
            embed_size=config.embed_size,
            max_seq_length=config.max_seq_length - 1,
            hidden_size=config.decoder_hidden_size,
            # Hardcoded to be 1 layer to align with pytorch version. Otherwise, we
            # need to define the initial state for each layer in
            # _prepare_decoder_initial_state and change _post_process_decoder_state
            num_layers=1,
            dropout=config.dropout,
            cell_type=config.decoder_cell_type,
            embeddings_initializer=vocab_embeddings_initializer,
            shared_embedding_layer=shared_embedding_layer,
            trainable_embedding=config.trainable_embedding,
            return_state=True)
        state_updater = _VanillaStateUpdater(config.state_updater_cell_type,
                                             config.num_states, config.dropout)

        self.encoder_output_projector = _VanillaEncoderOutputProjector(
            hidden_sizes=list(config.encoder_projection_sizes),
            output_size=config.num_states,
            dropout=config.dropout)
        self.sample_post_processor = utils.MLP(
            config.sampler_post_processor_output_sizes, dropout=config.dropout)
        self.shared_embedding_layer = shared_embedding_layer

        super(VanillaLinearVAECell, self).__init__(encoder=encoder,
                                                   sampler=sampler,
                                                   decoder=decoder,
                                                   state_updater=state_updater)