Esempio n. 1
0
def siamese_model(bert_config,
                  num_labels,
                  siamese_type='classify',
                  pooling_type='CLS'):
    encoder = bert_models.get_transformer_encoder(bert_config)
    bert_siamese = BertSiamese(encoder=encoder,
                               pooling_type=pooling_type,
                               dropout_rate=bert_config.hidden_dropout_prob,
                               norm=True if siamese_type == 'ams' else False)

    # Uncomment following line to get a baseline model to debug your network
    # bert_siamese = encoder = LSTMSiamese(
    #   bert_config.vocab_size, bert_config.hidden_size,norm=True if siamese_type=='ams' else False)
    if siamese_type == 'classify':
        bert_siamese = SiameseClassifierModel(
            bert_siamese,
            num_labels=num_labels,
            dropout_rate=bert_config.hidden_dropout_prob)
    elif siamese_type == 'triplet':
        bert_siamese = SiameseTripletModel(bert_siamese)
    elif siamese_type == 'contrastive':
        bert_siamese = SiameseContrastiveModel(bert_siamese)
    elif siamese_type == 'ams':
        bert_siamese = SiameseAMSModel(bert_siamese)
    else:
        raise ValueError(f'Siamese type {siamese_type} not supported!!')
    return bert_siamese, encoder
Esempio n. 2
0
def create_albert_model(
        albert_config: bert_modeling.AlbertConfig) -> tf.keras.Model:
    """Creates an ALBERT keras core model from ALBERT configuration.

  Args:
    albert_config: An `AlbertConfig` to create the core model.

  Returns:
    A keras model.
  """
    # Adds input layers just as placeholders.
    input_word_ids = tf.keras.layers.Input(shape=(None, ),
                                           dtype=tf.int32,
                                           name="input_word_ids")
    input_mask = tf.keras.layers.Input(shape=(None, ),
                                       dtype=tf.int32,
                                       name="input_mask")
    input_type_ids = tf.keras.layers.Input(shape=(None, ),
                                           dtype=tf.int32,
                                           name="input_type_ids")
    transformer_encoder = bert_models.get_transformer_encoder(
        albert_config, sequence_length=None, float_dtype=tf.float32)
    sequence_output, pooled_output = transformer_encoder(
        [input_word_ids, input_mask, input_type_ids])
    # To keep consistent with legacy hub modules, the outputs are
    # "pooled_output" and "sequence_output".
    return tf.keras.Model(inputs=[input_word_ids, input_mask, input_type_ids],
                          outputs=[pooled_output,
                                   sequence_output]), transformer_encoder
Esempio n. 3
0
def unified_model(bert_config,
                  num_labels,
                  max_seq_length,
                  initializer=None):
  """BERT unified model in functional API style.

  Construct a Keras model for predicting `num_labels` outputs from an input with
  maximum sequence length `max_seq_length`, as well as

  Args:
    bert_config: BertConfig or AlbertConfig, the config defines the core
      BERT or ALBERT model.
    num_labels: integer, the number of classes.
    max_seq_length: integer, the maximum input sequence length.
    final_layer_initializer: Initializer for final dense layer. Defaulted
      TruncatedNormal initializer.
    hub_module_url: TF-Hub path/url to Bert module.

  Returns:
    Combined prediction model (words, mask, type) -> (one-hot labels)
    BERT sub-model (words, mask, type) -> (bert_outputs)
  """
  if initializer is None:
    initializer = tf.keras.initializers.TruncatedNormal(
        stddev=bert_config.initializer_range)

  bert_encoder = bert_models.get_transformer_encoder(bert_config, max_seq_length)
  return bert_unified_labeler.BertUnifiedLabeler(
      bert_encoder,
      num_classes=num_labels,
      initializer=initializer,
      dropout_rate=bert_config.hidden_dropout_prob), bert_encoder
Esempio n. 4
0
 def __init__(self,
              bert_config,
              name_to_features=None,
              name="serving_model"):
     super(BertServing, self).__init__(name=name)
     self.encoder = bert_models.get_transformer_encoder(
         bert_config, sequence_length=None)
     self.name_to_features = name_to_features
Esempio n. 5
0
def rewrite_weights(orig_ckpt, orig_config, output_ckpt,
                    pooler_initialization):
    """Remove vestigial pooler weights."""
    # read original checkpoint
    print(f"building model from config: [{orig_config}] ...")
    bert_config = bert_configs.BertConfig.from_json_file(orig_config)
    m = bert_models.get_transformer_encoder(bert_config=bert_config,
                                            sequence_length=1,
                                            output_range=1)

    print("...successfully built model.")

    print(f"\nloading model from prefix: [{orig_ckpt}] ...")
    checkpoint = tf.train.Checkpoint(model=m)
    checkpoint.restore(orig_ckpt).assert_existing_objects_matched()
    print("...successfully loaded model.")

    orig_pooler_weights, orig_pooler_bias = m.pooler_layer.weights

    print("\nupdating weights...")

    # update pooler bias
    print("  ...pooler bias with zeros.")
    new_pooler_bias = tf.constant(0.,
                                  dtype=orig_pooler_bias.dtype,
                                  shape=orig_pooler_bias.shape)

    # update pooler weights
    pooler_shape = orig_pooler_weights.shape
    pooler_dtype = orig_pooler_weights.dtype
    if pooler_initialization == "identity":
        print("  ...pooler weights with identity.")
        new_pooler_weights = tf.eye(pooler_shape[0], dtype=pooler_dtype)
    elif pooler_initialization == "truncated_normal":
        stddev = bert_config.initializer_range
        print("  ...pooler weights with truncated_normal "
              "(stddev={}).".format(stddev))
        new_pooler_weights = tf.random.truncated_normal(shape=pooler_shape,
                                                        mean=0.,
                                                        stddev=stddev,
                                                        dtype=pooler_dtype)
    else:
        raise ValueError(pooler_initialization)

    m.pooler_layer.set_weights([new_pooler_weights, new_pooler_bias])
    print("...weights updated!")

    print("\nsaving checkpoint...")
    new_checkpoint = tf.train.Checkpoint(model=m)
    # access save_counter so it is created before saving the checkpoint.
    new_checkpoint.save_counter  # pylint: disable=pointless-statement
    new_checkpoint.write(output_ckpt)
    print("... saved!")

    print(f"\nsurgery successful! new model at: [{output_ckpt}]")
Esempio n. 6
0
def ner_model(bert_config, num_labels, use_crf=False):
    encoder_network = bert_models.get_transformer_encoder(bert_config)

    if use_crf:
        model = BertNERCRFModel(encoder_network, num_labels)
    else:
        model = models.BertTokenClassifier(
            network=encoder_network,
            num_classes=num_labels,
            initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02),
            dropout_rate=bert_config.hidden_dropout_prob,
            output='logits')
    return model, encoder_network
Esempio n. 7
0
 def __init__(self,
              batch_size,
              config,
              bert_config,
              training,
              verbose=False):
     super(Model, self).__init__()
     self.config = config
     self.bert_encoder = bert_models.get_transformer_encoder(
         bert_config, sequence_length=self.config["max_num_wordpieces"])
     self.application_score_layer = ApplicationScoreLayer(config)
     self.training = training
     self.batch_size = batch_size
Esempio n. 8
0
def build_model(bert_dir):
    max_seq_len = 384

    bert_config = BertConfig.from_json_file(os.path.join(bert_dir, 'bert_config.json'))
    bert_encoder = get_transformer_encoder(bert_config, max_seq_len)

    input_ids = tf.keras.layers.Input(shape=(max_seq_len,), dtype=tf.int32, name='input_ids')
    input_mask = tf.keras.layers.Input(shape=(max_seq_len,), dtype=tf.int32, name='input_mask')
    segment_ids = tf.keras.layers.Input(shape=(max_seq_len,), dtype=tf.int32, name='segment_ids')

    bert_inputs = [input_ids, input_mask, segment_ids]
    bert_sequence_output, bert_pooled_output = bert_encoder(bert_inputs)

    out = Dense(1, activation='sigmoid', name='out')(bert_pooled_output)
    return Model(inputs=bert_inputs, outputs=[out])
Esempio n. 9
0
    def __init__(self, max_seq_length: int, bert_config: configs.BertConfig,
                 trainable: bool):
        """BERT class constructor.

    Args:
      max_seq_length: the maximum input sequence length.
      bert_config: Configuration for a BERT model.
      trainable: whether the model is trainable.
    """

        super(_BERT, self).__init__()

        self.bert_model = bert_models.get_transformer_encoder(
            bert_config, max_seq_length)
        self._trainable = trainable
Esempio n. 10
0
def squad_model(bert_config,
                max_seq_length,
                initializer=None,
                hub_module_url=None,
                hub_module_trainable=True):
  """Returns BERT Squad model along with core BERT model to import weights.

  Args:
    bert_config: BertConfig, the config defines the core Bert model.
    max_seq_length: integer, the maximum input sequence length.
    initializer: Initializer for the final dense layer in the span labeler.
      Defaulted to TruncatedNormal initializer.
    hub_module_url: TF-Hub path/url to Bert module.
    hub_module_trainable: True to finetune layers in the hub module.

  Returns:
    A tuple of (1) keras model that outputs start logits and end logits and
    (2) the core BERT transformer encoder.
  """
  if initializer is None:
    initializer = tf.keras.initializers.TruncatedNormal(
        stddev=bert_config.initializer_range)
  if not hub_module_url:
    bert_encoder = bert_models.get_transformer_encoder(bert_config,
                                                       max_seq_length)
    return models.BertSpanLabeler(
        network=bert_encoder, initializer=initializer), bert_encoder

  input_word_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_word_ids')
  input_mask = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_mask')
  input_type_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_type_ids')
  core_model = hub.KerasLayer(hub_module_url, trainable=hub_module_trainable)
  pooled_output, sequence_output = core_model(
      [input_word_ids, input_mask, input_type_ids])
  bert_encoder = tf.keras.Model(
      inputs={
          'input_word_ids': input_word_ids,
          'input_mask': input_mask,
          'input_type_ids': input_type_ids,
      },
      outputs=[sequence_output, pooled_output],
      name='core_model')
  return models.BertSpanLabeler(
      network=bert_encoder, initializer=initializer), bert_encoder
Esempio n. 11
0
def bert_model(
    num_classes: int,
    max_seq_length: int,
    bert_config: configs.BertConfig) -> Tuple[tf.keras.Model, tf.keras.Model]:
  """BERT classifier model in functional API style.

  Construct a Keras model for predicting `num_labels` outputs from an input with
  maximum sequence length `max_seq_length`.

  Args:
    num_classes: (int) the number of classes.
    max_seq_length: (int) the maximum input sequence length.
    bert_config: (BertConfig) Configuration for a BERT model.

  Returns:
    Combined prediction model (words, mask, type) -> (one-hot labels)
    BERT sub-model (words, mask, type) -> (bert_outputs)
  """
  # Defines initializer and encoder.
  final_layer_initializer = tf.keras.initializers.TruncatedNormal(
      stddev=bert_config.initializer_range)
  bert_encoder = bert_models.get_transformer_encoder(
      bert_config, max_seq_length, output_range=1)

  # Build model.
  inputs = bert_encoder.inputs
  _, cls_output = bert_encoder(inputs)
  cls_output = tf.keras.layers.Dropout(rate=bert_config.hidden_dropout_prob)(
      cls_output)

  # Build output.
  outputs = tf.keras.layers.Dense(
      num_classes,
      activation=None,
      kernel_initializer=final_layer_initializer,
      name='predictions/transform/logits')(
          cls_output)

  # Construct model.
  bert_classifier = tf.keras.Model(inputs=inputs, outputs=outputs)

  return bert_classifier, bert_encoder
Esempio n. 12
0
def pretrain_model(electra_config,
                   seq_length,
                   max_predictions_per_seq,
                   initializer=None):
  """Returns model to be used for pre-training.

  Args:
      bert_config: Configuration that defines the core BERT model.
      seq_length: Maximum sequence length of the training data.
      max_predictions_per_seq: Maximum number of tokens in sequence to mask out
        and use for pretraining.
      initializer: Initializer for weights in BertPretrainer.

  Returns:
      Pretraining model as well as core BERT submodel from which to save
      weights after pretraining.
  """
  input_word_ids = tf.keras.layers.Input(
      shape=(seq_length,), name='input_word_ids', dtype=tf.int32)
  input_mask = tf.keras.layers.Input(
      shape=(seq_length,), name='input_mask', dtype=tf.int32)
  input_type_ids = tf.keras.layers.Input(
      shape=(seq_length,), name='input_type_ids', dtype=tf.int32)
  masked_lm_positions = tf.keras.layers.Input(
      shape=(max_predictions_per_seq,),
      name='masked_lm_positions',
      dtype=tf.int32)
  masked_lm_ids = tf.keras.layers.Input(
      shape=(max_predictions_per_seq,), name='masked_lm_ids', dtype=tf.int32)
  masked_lm_weights = tf.keras.layers.Input(
      shape=(max_predictions_per_seq,),
      name='masked_lm_weights',
      dtype=tf.int32)
  gen_encoder = bert_models.get_transformer_encoder(
      electraconfigs.ElectraConfig.get_generator_bert(electra_config),
      seq_length)
  discrim_encoder = bert_models.get_transformer_encoder(
      electraconfigs.ElectraConfig.get_discriminator_bert(electra_config),
      seq_length)
  if initializer is None:
    initializer = tf.keras.initializers.TruncatedNormal(
        stddev=electra_config.initializer_range)
  pretrainer_model = models.ElectraPretrainer(
      network=gen_encoder,
      discriminator=discrim_encoder,
      num_classes=2,
      num_token_predictions=max_predictions_per_seq,
      initializer=initializer,
      output='predictions')

  lm_output, discrim_output, discrim_labels = pretrainer_model(
      [input_word_ids, input_mask, input_type_ids, masked_lm_positions])

  pretrain_loss_layer = ElectraPretrainLossAndMetricLayer(
      vocab_size=electra_config.vocab_size,
      discrim_rate=electra_config.discrim_rate)
  output_loss = pretrain_loss_layer(lm_output, masked_lm_ids, masked_lm_weights,
                                    discrim_output, discrim_labels, input_mask)
  keras_model = tf.keras.Model(
      inputs={
          'input_word_ids': input_word_ids,
          'input_mask': input_mask,
          'input_type_ids': input_type_ids,
          'masked_lm_positions': masked_lm_positions,
          'masked_lm_ids': masked_lm_ids,
          'masked_lm_weights': masked_lm_weights,
      },
      outputs=output_loss)
  return keras_model, discrim_encoder
Esempio n. 13
0
def classifier_model(bert_config,
                     num_labels,
                     max_seq_length,
                     final_layer_initializer=None,
                     hub_module_url=None,
                     hub_module_trainable=True):
  """BERT classifier model in functional API style.

  Construct a Keras model for predicting `num_labels` outputs from an input with
  maximum sequence length `max_seq_length`.

  Args:
    bert_config: BertConfig or AlbertConfig, the config defines the core BERT or
      ALBERT model.
    num_labels: integer, the number of classes.
    max_seq_length: integer, the maximum input sequence length.
    final_layer_initializer: Initializer for final dense layer. Defaulted
      TruncatedNormal initializer.
    hub_module_url: TF-Hub path/url to Bert module.
    hub_module_trainable: True to finetune layers in the hub module.

  Returns:
    Combined prediction model (words, mask, type) -> (one-hot labels)
    BERT sub-model (words, mask, type) -> (bert_outputs)
  """
  if final_layer_initializer is not None:
    initializer = final_layer_initializer
  else:
    initializer = tf.keras.initializers.TruncatedNormal(
        stddev=bert_config.initializer_range)

  if not hub_module_url:
    bert_encoder = bert_models.get_transformer_encoder(bert_config,
                                                       max_seq_length)
    return models.BertClassifier(
        bert_encoder,
        num_classes=num_labels,
        dropout_rate=bert_config.hidden_dropout_prob,
        initializer=initializer), bert_encoder

  input_word_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_word_ids')
  input_mask = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_mask')
  input_type_ids = tf.keras.layers.Input(
      shape=(max_seq_length,), dtype=tf.int32, name='input_type_ids')
  bert_model = hub.KerasLayer(
      hub_module_url, trainable=hub_module_trainable)
  pooled_output, _ = bert_model([input_word_ids, input_mask, input_type_ids])
  output = tf.keras.layers.Dropout(rate=bert_config.hidden_dropout_prob)(
      pooled_output)

  output = tf.keras.layers.Dense(
      num_labels, kernel_initializer=initializer, name='output')(
          output)
  return tf.keras.Model(
      inputs={
          'input_word_ids': input_word_ids,
          'input_mask': input_mask,
          'input_type_ids': input_type_ids
      },
      outputs=output), bert_model