コード例 #1
0
def squad_model(bert_config,
                max_seq_length,
                initializer=None,
                hub_module_url=None,
                hub_module_trainable=True):
  """Returns BERT Squad model along with core BERT model to import weights.

  Args:
    bert_config: BertConfig, the config defines the core Bert model.
    max_seq_length: integer, the maximum input sequence length.
    initializer: Initializer for the final dense layer in the span labeler.
      Defaulted to TruncatedNormal initializer.
    hub_module_url: TF-Hub path/url to Bert module.
    hub_module_trainable: True to finetune layers in the hub module.

  Returns:
    A tuple of (1) keras model that outputs start logits and end logits and
    (2) the core BERT transformer encoder.
  """
  if initializer is None:
    initializer = tf.keras.initializers.TruncatedNormal(
        stddev=bert_config.initializer_range)
  if not hub_module_url:
    bert_encoder = get_transformer_encoder(bert_config, max_seq_length)
    return bert_span_labeler.BertSpanLabeler(
        network=bert_encoder, initializer=initializer), bert_encoder

  input_word_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_word_ids')
  input_mask = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_mask')
  input_type_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_type_ids')
  core_model = hub.KerasLayer(hub_module_url, trainable=hub_module_trainable)
  pooled_output, sequence_output = core_model(
      [input_word_ids, input_mask, input_type_ids])
  bert_encoder = tf.keras.Model(
      inputs={
          'input_word_ids': input_word_ids,
          'input_mask': input_mask,
          'input_type_ids': input_type_ids,
      },
      outputs=[sequence_output, pooled_output],
      name='core_model')
  return bert_span_labeler.BertSpanLabeler(
      network=bert_encoder, initializer=initializer), bert_encoder
コード例 #2
0
def squad_model(bert_config,
                max_seq_length,
                float_type,
                initializer=None,
                hub_module_url=None):
    """Returns BERT Squad model along with core BERT model to import weights.

  Args:
    bert_config: BertConfig, the config defines the core Bert model.
    max_seq_length: integer, the maximum input sequence length.
    float_type: tf.dtype, tf.float32 or tf.bfloat16.
    initializer: Initializer for the final dense layer in the span labeler.
      Defaulted to TruncatedNormal initializer.
    hub_module_url: TF-Hub path/url to Bert module.

  Returns:
    A tuple of (1) keras model that outputs start logits and end logits and
    (2) the core BERT transformer encoder.
  """
    if initializer is None:
        initializer = tf.keras.initializers.TruncatedNormal(
            stddev=bert_config.initializer_range)
    if not hub_module_url:
        bert_encoder = _get_transformer_encoder(bert_config, max_seq_length,
                                                float_type)
        return bert_span_labeler.BertSpanLabeler(
            network=bert_encoder, initializer=initializer), bert_encoder

    input_word_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='input_word_ids')
    input_mask = tf.keras.layers.Input(shape=(max_seq_length, ),
                                       dtype=tf.int32,
                                       name='input_mask')
    input_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='input_type_ids')
    core_model = hub.KerasLayer(hub_module_url, trainable=True)
    _, sequence_output = core_model(
        [input_word_ids, input_mask, input_type_ids])
    # Sets the shape manually due to a bug in TF shape inference.
    # TODO(hongkuny): remove this once shape inference is correct.
    sequence_output.set_shape((None, max_seq_length, bert_config.hidden_size))

    squad_logits_layer = BertSquadLogitsLayer(initializer=initializer,
                                              float_type=float_type,
                                              name='squad_logits')
    start_logits, end_logits = squad_logits_layer(sequence_output)

    squad = tf.keras.Model(inputs={
        'input_word_ids': input_word_ids,
        'input_mask': input_mask,
        'input_type_ids': input_type_ids,
    },
                           outputs=[start_logits, end_logits],
                           name='squad_model')
    return squad, core_model
コード例 #3
0
  def test_bert_trainer_tensor_call(self):
    """Validate that the Keras object can be invoked."""
    # Build a transformer network to use within the BERT trainer. (Here, we use
    # a short sequence_length for convenience.)
    test_network = networks.TransformerEncoder(
        vocab_size=100, num_layers=2, sequence_length=2)

    # Create a BERT trainer with the created network.
    bert_trainer_model = bert_span_labeler.BertSpanLabeler(test_network)

    # Create a set of 2-dimensional data tensors to feed into the model.
    word_ids = tf.constant([[1, 1], [2, 2]], dtype=tf.int32)
    mask = tf.constant([[1, 1], [1, 0]], dtype=tf.int32)
    type_ids = tf.constant([[1, 1], [2, 2]], dtype=tf.int32)

    # Invoke the trainer model on the tensors. In Eager mode, this does the
    # actual calculation. (We can't validate the outputs, since the network is
    # too complex: this simply ensures we're not hitting runtime errors.)
    _ = bert_trainer_model([word_ids, mask, type_ids])
コード例 #4
0
  def test_bert_trainer_named_compilation(self):
    """Validate compilation using explicit output names."""
    # Build a transformer network to use within the BERT trainer.
    vocab_size = 100
    sequence_length = 512
    test_network = networks.TransformerEncoder(
        vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length)

    # Create a BERT trainer with the created network.
    bert_trainer_model = bert_span_labeler.BertSpanLabeler(test_network)

    # Attempt to compile the model using a string-keyed dict of output names to
    # loss functions. This will validate that the outputs are named as we
    # expect.
    bert_trainer_model.compile(
        optimizer='sgd',
        loss={
            'start_positions': 'mse',
            'end_positions': 'mse'
        })
コード例 #5
0
  def test_serialize_deserialize(self):
    """Validate that the BERT trainer can be serialized and deserialized."""
    # Build a transformer network to use within the BERT trainer. (Here, we use
    # a short sequence_length for convenience.)
    test_network = networks.TransformerEncoder(
        vocab_size=100, num_layers=2, sequence_length=5)

    # Create a BERT trainer with the created network. (Note that all the args
    # are different, so we can catch any serialization mismatches.)
    bert_trainer_model = bert_span_labeler.BertSpanLabeler(test_network)

    # Create another BERT trainer via serialization and deserialization.
    config = bert_trainer_model.get_config()
    new_bert_trainer_model = bert_span_labeler.BertSpanLabeler.from_config(
        config)

    # Validate that the config can be forced to JSON.
    _ = new_bert_trainer_model.to_json()

    # If the serialization was successful, the new config should match the old.
    self.assertAllEqual(bert_trainer_model.get_config(),
                        new_bert_trainer_model.get_config())
コード例 #6
0
  def test_bert_trainer(self):
    """Validate that the Keras object can be created."""
    # Build a transformer network to use within the BERT trainer.
    vocab_size = 100
    sequence_length = 512
    test_network = networks.TransformerEncoder(
        vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length)

    # Create a BERT trainer with the created network.
    bert_trainer_model = bert_span_labeler.BertSpanLabeler(test_network)

    # Create a set of 2-dimensional inputs (the first dimension is implicit).
    word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)
    type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)

    # Invoke the trainer model on the inputs. This causes the layer to be built.
    cls_outs = bert_trainer_model([word_ids, mask, type_ids])

    # Validate that there are 2 outputs are of the expected shape.
    self.assertEqual(2, len(cls_outs))
    expected_shape = [None, sequence_length]
    for out in cls_outs:
      self.assertAllEqual(expected_shape, out.shape.as_list())
コード例 #7
0
ファイル: bert_models.py プロジェクト: nj1111/models
def squad_model(bert_config,
                max_seq_length,
                float_type,
                initializer=None,
                hub_module_url=None,
                use_keras_bert=False):
  """Returns BERT Squad model along with core BERT model to import weights.

  Args:
    bert_config: BertConfig, the config defines the core Bert model.
    max_seq_length: integer, the maximum input sequence length.
    float_type: tf.dtype, tf.float32 or tf.bfloat16.
    initializer: Initializer for weights in BertSquadLogitsLayer.
    hub_module_url: TF-Hub path/url to Bert module.
    use_keras_bert: Whether to use keras BERT. Note that when the above
      'hub_module_url' is specified, 'use_keras_bert' cannot be True.

  Returns:
    A tuple of (1) keras model that outputs start logits and end logits and
    (2) the core BERT transformer encoder.

  Raises:
    ValueError: When 'hub_module_url' is specified and 'use_keras_bert' is True.
  """
  if hub_module_url and use_keras_bert:
    raise ValueError(
        'Cannot use hub_module_url and keras BERT at the same time.')

  if use_keras_bert:
    bert_encoder = _get_transformer_encoder(bert_config, max_seq_length)
    return bert_span_labeler.BertSpanLabeler(
        network=bert_encoder), bert_encoder

  input_word_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_word_ids')
  input_mask = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_mask')
  input_type_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_type_ids')
  if hub_module_url:
    core_model = hub.KerasLayer(hub_module_url, trainable=True)
    _, sequence_output = core_model(
        [input_word_ids, input_mask, input_type_ids])
    # Sets the shape manually due to a bug in TF shape inference.
    # TODO(hongkuny): remove this once shape inference is correct.
    sequence_output.set_shape((None, max_seq_length, bert_config.hidden_size))
  else:
    core_model = modeling.get_bert_model(
        input_word_ids,
        input_mask,
        input_type_ids,
        config=bert_config,
        name='bert_model',
        float_type=float_type)
    # `BertSquadModel` only uses the sequnce_output which
    # has dimensionality (batch_size, sequence_length, num_hidden).
    sequence_output = core_model.outputs[1]

  if initializer is None:
    initializer = tf.keras.initializers.TruncatedNormal(
        stddev=bert_config.initializer_range)
  squad_logits_layer = BertSquadLogitsLayer(
      initializer=initializer, float_type=float_type, name='squad_logits')
  start_logits, end_logits = squad_logits_layer(sequence_output)

  squad = tf.keras.Model(
      inputs={
          'input_word_ids': input_word_ids,
          'input_mask': input_mask,
          'input_type_ids': input_type_ids,
      },
      outputs=[start_logits, end_logits],
      name='squad_model')
  return squad, core_model