Beispiel #1
0
def create_bert_model(bert_config: bert_modeling.BertConfig):
    """Creates a BERT keras core model from BERT configuration.

  Args:
    bert_config: A BertConfig` to create the core model.

  Returns:
    A keras model.
  """
    # Adds input layers just as placeholders.
    input_word_ids = tf.keras.layers.Input(shape=(None, ),
                                           dtype=tf.int32,
                                           name="input_word_ids")
    input_mask = tf.keras.layers.Input(shape=(None, ),
                                       dtype=tf.int32,
                                       name="input_mask")
    input_type_ids = tf.keras.layers.Input(shape=(None, ),
                                           dtype=tf.int32,
                                           name="input_type_ids")
    return bert_modeling.get_bert_model(input_word_ids,
                                        input_mask,
                                        input_type_ids,
                                        config=bert_config,
                                        name="bert_model",
                                        float_type=tf.float32)
def create_bert_model(bert_config):
  """Creates a BERT keras core model from BERT configuration.

  Args:
    bert_config: A BertConfig` to create the core model.
  Returns:
    A keras model.
  """
  max_seq_length = bert_config.max_position_embeddings

  # Adds input layers just as placeholders.
  input_word_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name="input_word_ids")
  input_mask = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name="input_mask")
  input_type_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name="input_type_ids")
  core_model = modeling.get_bert_model(
      input_word_ids,
      input_mask,
      input_type_ids,
      config=bert_config,
      name="bert_model",
      float_type=tf.float32)
  return core_model
Beispiel #3
0
def classifier_model(bert_config,
                     float_type,
                     num_labels,
                     max_seq_length,
                     final_layer_initializer=None):
    """BERT classifier model in functional API style.

  Construct a Keras model for predicting `num_labels` outputs from an input with
  maximum sequence length `max_seq_length`.

  Args:
    bert_config: BertConfig, the config defines the core BERT model.
    float_type: dtype, tf.float32 or tf.bfloat16.
    num_labels: integer, the number of classes.
    max_seq_length: integer, the maximum input sequence length.
    final_layer_initializer: Initializer for final dense layer. Defaulted
      TruncatedNormal initializer.

  Returns:
    Combined prediction model (words, mask, type) -> (one-hot labels)
    BERT sub-model (words, mask, type) -> (bert_outputs)
  """
    input_word_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='input_word_ids')
    input_mask = tf.keras.layers.Input(shape=(max_seq_length, ),
                                       dtype=tf.int32,
                                       name='input_mask')
    input_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='input_type_ids')
    bert_model = modeling.get_bert_model(input_word_ids,
                                         input_mask,
                                         input_type_ids,
                                         config=bert_config,
                                         float_type=float_type)
    pooled_output = bert_model.outputs[0]
    if final_layer_initializer is not None:
        initializer = final_layer_initializer
    else:
        initializer = tf.keras.initializers.TruncatedNormal(
            stddev=bert_config.initializer_range)

    output = tf.keras.layers.Dropout(
        rate=bert_config.hidden_dropout_prob)(pooled_output)
    output = tf.keras.layers.Dense(num_labels,
                                   kernel_initializer=initializer,
                                   name='output',
                                   dtype=float_type)(output)
    return tf.keras.Model(inputs={
        'input_word_ids': input_word_ids,
        'input_mask': input_mask,
        'input_type_ids': input_type_ids
    },
                          outputs=output), bert_model
Beispiel #4
0
def squad_model(bert_config, max_seq_length, float_type, initializer=None):
    """Returns BERT Squad model along with core BERT model to import weights.

  Args:
    bert_config: BertConfig, the config defines the core Bert model.
    max_seq_length: integer, the maximum input sequence length.
    float_type: tf.dtype, tf.float32 or tf.bfloat16.
    initializer: Initializer for weights in BertSquadLogitsLayer.

  Returns:
    Two tensors, start logits and end logits, [batch x sequence length].
  """
    unique_ids = tf.keras.layers.Input(shape=(1, ),
                                       dtype=tf.int32,
                                       name='unique_ids')
    input_word_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='input_ids')
    input_mask = tf.keras.layers.Input(shape=(max_seq_length, ),
                                       dtype=tf.int32,
                                       name='input_mask')
    input_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='segment_ids')

    core_model = modeling.get_bert_model(input_word_ids,
                                         input_mask,
                                         input_type_ids,
                                         config=bert_config,
                                         name='bert_model',
                                         float_type=float_type)

    # `BertSquadModel` only uses the sequnce_output which
    # has dimensionality (batch_size, sequence_length, num_hidden).
    sequence_output = core_model.outputs[1]

    if initializer is None:
        initializer = tf.keras.initializers.TruncatedNormal(
            stddev=bert_config.initializer_range)
    squad_logits_layer = BertSquadLogitsLayer(initializer=initializer,
                                              float_type=float_type,
                                              name='squad_logits')
    start_logits, end_logits = squad_logits_layer(sequence_output)

    squad = tf.keras.Model(inputs={
        'unique_ids': unique_ids,
        'input_ids': input_word_ids,
        'input_mask': input_mask,
        'segment_ids': input_type_ids,
    },
                           outputs=[unique_ids, start_logits, end_logits],
                           name='squad_model')
    return squad, core_model
Beispiel #5
0
def squad_model(bert_config,
                max_seq_length,
                float_type,
                initializer=None,
                hub_module_url=None):
    """Returns BERT Squad model along with core BERT model to import weights.

  Args:
    bert_config: BertConfig, the config defines the core Bert model.
    max_seq_length: integer, the maximum input sequence length.
    float_type: tf.dtype, tf.float32 or tf.bfloat16.
    initializer: Initializer for weights in BertSquadLogitsLayer.
    hub_module_url: TF-Hub path/url to Bert module.

  Returns:
    Two tensors, start logits and end logits, [batch x sequence length].
  """
    unique_ids = tf.keras.layers.Input(shape=(1, ),
                                       dtype=tf.int32,
                                       name='unique_ids')
    input_word_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='input_ids')
    input_mask = tf.keras.layers.Input(shape=(max_seq_length, ),
                                       dtype=tf.int32,
                                       name='input_mask')
    input_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='segment_ids')

    if hub_module_url:
        core_model = hub.KerasLayer(hub_module_url, trainable=True)
        _, sequence_output = core_model(
            [input_word_ids, input_mask, input_type_ids])
        # Sets the shape manually due to a bug in TF shape inference.
        # TODO(hongkuny): remove this once shape inference is correct.
        sequence_output.set_shape(
            (None, max_seq_length, bert_config.hidden_size))
    else:
        core_model = modeling.get_bert_model(input_word_ids,
                                             input_mask,
                                             input_type_ids,
                                             config=bert_config,
                                             name='bert_model',
                                             float_type=float_type)
        # `BertSquadModel` only uses the sequnce_output which
        # has dimensionality (batch_size, sequence_length, num_hidden).
        sequence_output = core_model.outputs[1]

    if initializer is None:
        initializer = tf.keras.initializers.TruncatedNormal(
            stddev=bert_config.initializer_range)
    squad_logits_layer = BertSquadLogitsLayer(initializer=initializer,
                                              float_type=float_type,
                                              name='squad_logits')
    start_logits, end_logits = squad_logits_layer(sequence_output)

    squad = tf.keras.Model(inputs={
        'unique_ids': unique_ids,
        'input_ids': input_word_ids,
        'input_mask': input_mask,
        'segment_ids': input_type_ids,
    },
                           outputs=[unique_ids, start_logits, end_logits],
                           name='squad_model')
    return squad, core_model
Beispiel #6
0
def pretrain_model(bert_config,
                   seq_length,
                   max_predictions_per_seq,
                   initializer=None):
    """Returns model to be used for pre-training.

  Args:
      bert_config: Configuration that defines the core BERT model.
      seq_length: Maximum sequence length of the training data.
      max_predictions_per_seq: Maximum number of tokens in sequence to mask out
        and use for pretraining.
      initializer: Initializer for weights in BertPretrainLayer.

  Returns:
      Pretraining model as well as core BERT submodel from which to save
      weights after pretraining.
  """

    input_word_ids = tf.keras.layers.Input(shape=(seq_length, ),
                                           name='input_word_ids',
                                           dtype=tf.int32)
    input_mask = tf.keras.layers.Input(shape=(seq_length, ),
                                       name='input_mask',
                                       dtype=tf.int32)
    input_type_ids = tf.keras.layers.Input(shape=(seq_length, ),
                                           name='input_type_ids',
                                           dtype=tf.int32)
    masked_lm_positions = tf.keras.layers.Input(
        shape=(max_predictions_per_seq, ),
        name='masked_lm_positions',
        dtype=tf.int32)
    masked_lm_weights = tf.keras.layers.Input(
        shape=(max_predictions_per_seq, ),
        name='masked_lm_weights',
        dtype=tf.int32)
    next_sentence_labels = tf.keras.layers.Input(shape=(1, ),
                                                 name='next_sentence_labels',
                                                 dtype=tf.int32)
    masked_lm_ids = tf.keras.layers.Input(shape=(max_predictions_per_seq, ),
                                          name='masked_lm_ids',
                                          dtype=tf.int32)

    bert_submodel_name = 'bert_model'
    bert_submodel = modeling.get_bert_model(input_word_ids,
                                            input_mask,
                                            input_type_ids,
                                            name=bert_submodel_name,
                                            config=bert_config)
    pooled_output = bert_submodel.outputs[0]
    sequence_output = bert_submodel.outputs[1]

    pretrain_layer = BertPretrainLayer(
        bert_config,
        bert_submodel.get_layer(bert_submodel_name),
        initializer=initializer,
        name='cls')
    lm_output, sentence_output = pretrain_layer(pooled_output, sequence_output,
                                                masked_lm_positions)

    pretrain_loss_layer = BertPretrainLossAndMetricLayer(bert_config)
    output_loss = pretrain_loss_layer(lm_output, sentence_output,
                                      masked_lm_ids, masked_lm_weights,
                                      next_sentence_labels)

    return tf.keras.Model(inputs={
        'input_word_ids': input_word_ids,
        'input_mask': input_mask,
        'input_type_ids': input_type_ids,
        'masked_lm_positions': masked_lm_positions,
        'masked_lm_ids': masked_lm_ids,
        'masked_lm_weights': masked_lm_weights,
        'next_sentence_labels': next_sentence_labels,
    },
                          outputs=output_loss), bert_submodel
Beispiel #7
0
def squad_model(bert_config,
                max_seq_length,
                float_type,
                initializer=None,
                hub_module_url=None,
                use_keras_bert=False):
  """Returns BERT Squad model along with core BERT model to import weights.

  Args:
    bert_config: BertConfig, the config defines the core Bert model.
    max_seq_length: integer, the maximum input sequence length.
    float_type: tf.dtype, tf.float32 or tf.bfloat16.
    initializer: Initializer for weights in BertSquadLogitsLayer.
    hub_module_url: TF-Hub path/url to Bert module.
    use_keras_bert: Whether to use keras BERT. Note that when the above
      'hub_module_url' is specified, 'use_keras_bert' cannot be True.

  Returns:
    A tuple of (1) keras model that outputs start logits and end logits and
    (2) the core BERT transformer encoder.

  Raises:
    ValueError: When 'hub_module_url' is specified and 'use_keras_bert' is True.
  """
  if hub_module_url and use_keras_bert:
    raise ValueError(
        'Cannot use hub_module_url and keras BERT at the same time.')

  if use_keras_bert:
    bert_encoder = _get_transformer_encoder(bert_config, max_seq_length)
    return bert_span_labeler.BertSpanLabeler(
        network=bert_encoder), bert_encoder

  input_word_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_word_ids')
  input_mask = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_mask')
  input_type_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_type_ids')
  if hub_module_url:
    core_model = hub.KerasLayer(hub_module_url, trainable=True)
    _, sequence_output = core_model(
        [input_word_ids, input_mask, input_type_ids])
    # Sets the shape manually due to a bug in TF shape inference.
    # TODO(hongkuny): remove this once shape inference is correct.
    sequence_output.set_shape((None, max_seq_length, bert_config.hidden_size))
  else:
    core_model = modeling.get_bert_model(
        input_word_ids,
        input_mask,
        input_type_ids,
        config=bert_config,
        name='bert_model',
        float_type=float_type)
    # `BertSquadModel` only uses the sequnce_output which
    # has dimensionality (batch_size, sequence_length, num_hidden).
    sequence_output = core_model.outputs[1]

  if initializer is None:
    initializer = tf.keras.initializers.TruncatedNormal(
        stddev=bert_config.initializer_range)
  squad_logits_layer = BertSquadLogitsLayer(
      initializer=initializer, float_type=float_type, name='squad_logits')
  start_logits, end_logits = squad_logits_layer(sequence_output)

  squad = tf.keras.Model(
      inputs={
          'input_word_ids': input_word_ids,
          'input_mask': input_mask,
          'input_type_ids': input_type_ids,
      },
      outputs=[start_logits, end_logits],
      name='squad_model')
  return squad, core_model