コード例 #1
0
def create_bert_model(bert_config):
  """Creates a BERT keras core model from BERT configuration.

  Args:
    bert_config: A BertConfig` to create the core model.
  Returns:
    A keras model.
  """
  max_seq_length = bert_config.max_position_embeddings

  # Adds input layers just as placeholders.
  input_word_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name="input_word_ids")
  input_mask = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name="input_mask")
  input_type_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name="input_type_ids")
  core_model = modeling.get_bert_model(
      input_word_ids,
      input_mask,
      input_type_ids,
      config=bert_config,
      name="bert_model",
      float_type=tf.float32)
  return core_model
コード例 #2
0
ファイル: bert_models.py プロジェクト: qa276390/tf-models
def classifier_model(bert_config,
                     float_type,
                     num_labels,
                     max_seq_length,
                     final_layer_initializer=None):
    """BERT classifier model in functional API style.

  Construct a Keras model for predicting `num_labels` outputs from an input with
  maximum sequence length `max_seq_length`.

  Args:
    bert_config: BertConfig, the config defines the core BERT model.
    float_type: dtype, tf.float32 or tf.bfloat16.
    num_labels: integer, the number of classes.
    max_seq_length: integer, the maximum input sequence length.
    final_layer_initializer: Initializer for final dense layer. Defaulted
      TruncatedNormal initializer.

  Returns:
    Combined prediction model (words, mask, type) -> (one-hot labels)
    BERT sub-model (words, mask, type) -> (bert_outputs)
  """
    input_word_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='input_word_ids')
    input_mask = tf.keras.layers.Input(shape=(max_seq_length, ),
                                       dtype=tf.int32,
                                       name='input_mask')
    input_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='input_type_ids')
    bert_model = modeling.get_bert_model(input_word_ids,
                                         input_mask,
                                         input_type_ids,
                                         config=bert_config,
                                         float_type=float_type)
    pooled_output = bert_model.outputs[0]
    if final_layer_initializer is not None:
        initializer = final_layer_initializer
    else:
        initializer = tf.keras.initializers.TruncatedNormal(
            stddev=bert_config.initializer_range)

    output = tf.keras.layers.Dropout(
        rate=bert_config.hidden_dropout_prob)(pooled_output)
    output = tf.keras.layers.Dense(num_labels,
                                   kernel_initializer=initializer,
                                   name='output',
                                   dtype=float_type)(output)
    return tf.keras.Model(inputs={
        'input_word_ids': input_word_ids,
        'input_mask': input_mask,
        'input_type_ids': input_type_ids
    },
                          outputs=output), bert_model
コード例 #3
0
ファイル: bert_models.py プロジェクト: qa276390/tf-models
def squad_model(bert_config, max_seq_length, float_type, initializer=None):
    """Returns BERT Squad model along with core BERT model to import weights.

  Args:
    bert_config: BertConfig, the config defines the core Bert model.
    max_seq_length: integer, the maximum input sequence length.
    float_type: tf.dtype, tf.float32 or tf.bfloat16.
    initializer: Initializer for weights in BertSquadLogitsLayer.

  Returns:
    Two tensors, start logits and end logits, [batch x sequence length].
  """
    unique_ids = tf.keras.layers.Input(shape=(1, ),
                                       dtype=tf.int32,
                                       name='unique_ids')
    input_word_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='input_ids')
    input_mask = tf.keras.layers.Input(shape=(max_seq_length, ),
                                       dtype=tf.int32,
                                       name='input_mask')
    input_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='segment_ids')

    core_model = modeling.get_bert_model(input_word_ids,
                                         input_mask,
                                         input_type_ids,
                                         config=bert_config,
                                         name='bert_model',
                                         float_type=float_type)

    # `BertSquadModel` only uses the sequnce_output which
    # has dimensionality (batch_size, sequence_length, num_hidden).
    sequence_output = core_model.outputs[1]

    if initializer is None:
        initializer = tf.keras.initializers.TruncatedNormal(
            stddev=bert_config.initializer_range)
    squad_logits_layer = BertSquadLogitsLayer(initializer=initializer,
                                              float_type=float_type,
                                              name='squad_logits')
    start_logits, end_logits = squad_logits_layer(sequence_output)

    squad = tf.keras.Model(inputs={
        'unique_ids': unique_ids,
        'input_ids': input_word_ids,
        'input_mask': input_mask,
        'segment_ids': input_type_ids,
    },
                           outputs=[unique_ids, start_logits, end_logits],
                           name='squad_model')
    return squad, core_model
コード例 #4
0
def load_pretrained_bert_model(bert_dir, max_length):
    bert_model = modeling.get_bert_model(
        tf.keras.layers.Input(
            shape=(max_length,), dtype=tf.int32, name='input_wod_ids'),
        tf.keras.layers.Input(
            shape=(max_length,), dtype=tf.int32, name='input_mask'),
        tf.keras.layers.Input(
            shape=(max_length,), dtype=tf.int32, name='input_type_ids'),
        config=modeling.BertConfig.from_json_file(os.path.join(bert_dir, 'bert_config.json')),
        float_type=tf.float32)

    # load pretrained model
    init_checkpoint = os.path.join(bert_dir, 'bert_model.ckpt')
    checkpoint = tf.train.Checkpoint(model=bert_model)
    checkpoint.restore(init_checkpoint)

    return bert_model
コード例 #5
0
ファイル: bert_models.py プロジェクト: qa276390/tf-models
def pretrain_model(bert_config,
                   seq_length,
                   max_predictions_per_seq,
                   initializer=None):
    """Returns model to be used for pre-training.

  Args:
      bert_config: Configuration that defines the core BERT model.
      seq_length: Maximum sequence length of the training data.
      max_predictions_per_seq: Maximum number of tokens in sequence to mask out
        and use for pretraining.
      initializer: Initializer for weights in BertPretrainLayer.

  Returns:
      Pretraining model as well as core BERT submodel from which to save
      weights after pretraining.
  """

    input_word_ids = tf.keras.layers.Input(shape=(seq_length, ),
                                           name='input_word_ids',
                                           dtype=tf.int32)
    input_mask = tf.keras.layers.Input(shape=(seq_length, ),
                                       name='input_mask',
                                       dtype=tf.int32)
    input_type_ids = tf.keras.layers.Input(shape=(seq_length, ),
                                           name='input_type_ids',
                                           dtype=tf.int32)
    masked_lm_positions = tf.keras.layers.Input(
        shape=(max_predictions_per_seq, ),
        name='masked_lm_positions',
        dtype=tf.int32)
    masked_lm_weights = tf.keras.layers.Input(
        shape=(max_predictions_per_seq, ),
        name='masked_lm_weights',
        dtype=tf.int32)
    next_sentence_labels = tf.keras.layers.Input(shape=(1, ),
                                                 name='next_sentence_labels',
                                                 dtype=tf.int32)
    masked_lm_ids = tf.keras.layers.Input(shape=(max_predictions_per_seq, ),
                                          name='masked_lm_ids',
                                          dtype=tf.int32)

    bert_submodel_name = 'bert_core_layer'
    bert_submodel = modeling.get_bert_model(input_word_ids,
                                            input_mask,
                                            input_type_ids,
                                            name=bert_submodel_name,
                                            config=bert_config)
    pooled_output = bert_submodel.outputs[0]
    sequence_output = bert_submodel.outputs[1]

    pretrain_layer = BertPretrainLayer(
        bert_config,
        bert_submodel.get_layer(bert_submodel_name),
        initializer=initializer)
    lm_output, sentence_output = pretrain_layer(pooled_output, sequence_output,
                                                masked_lm_positions)

    pretrain_loss_layer = BertPretrainLossAndMetricLayer(bert_config)
    output_loss = pretrain_loss_layer(lm_output, sentence_output,
                                      masked_lm_ids, masked_lm_weights,
                                      next_sentence_labels)

    return tf.keras.Model(inputs={
        'input_word_ids': input_word_ids,
        'input_mask': input_mask,
        'input_type_ids': input_type_ids,
        'masked_lm_positions': masked_lm_positions,
        'masked_lm_ids': masked_lm_ids,
        'masked_lm_weights': masked_lm_weights,
        'next_sentence_labels': next_sentence_labels,
    },
                          outputs=output_loss), bert_submodel