def create_bert_model(bert_config): """Creates a BERT keras core model from BERT configuration. Args: bert_config: A BertConfig` to create the core model. Returns: A keras model. """ max_seq_length = bert_config.max_position_embeddings # Adds input layers just as placeholders. input_word_ids = tf.keras.layers.Input( shape=(max_seq_length,), dtype=tf.int32, name="input_word_ids") input_mask = tf.keras.layers.Input( shape=(max_seq_length,), dtype=tf.int32, name="input_mask") input_type_ids = tf.keras.layers.Input( shape=(max_seq_length,), dtype=tf.int32, name="input_type_ids") core_model = modeling.get_bert_model( input_word_ids, input_mask, input_type_ids, config=bert_config, name="bert_model", float_type=tf.float32) return core_model
def classifier_model(bert_config, float_type, num_labels, max_seq_length, final_layer_initializer=None): """BERT classifier model in functional API style. Construct a Keras model for predicting `num_labels` outputs from an input with maximum sequence length `max_seq_length`. Args: bert_config: BertConfig, the config defines the core BERT model. float_type: dtype, tf.float32 or tf.bfloat16. num_labels: integer, the number of classes. max_seq_length: integer, the maximum input sequence length. final_layer_initializer: Initializer for final dense layer. Defaulted TruncatedNormal initializer. Returns: Combined prediction model (words, mask, type) -> (one-hot labels) BERT sub-model (words, mask, type) -> (bert_outputs) """ input_word_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype=tf.int32, name='input_word_ids') input_mask = tf.keras.layers.Input(shape=(max_seq_length, ), dtype=tf.int32, name='input_mask') input_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype=tf.int32, name='input_type_ids') bert_model = modeling.get_bert_model(input_word_ids, input_mask, input_type_ids, config=bert_config, float_type=float_type) pooled_output = bert_model.outputs[0] if final_layer_initializer is not None: initializer = final_layer_initializer else: initializer = tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range) output = tf.keras.layers.Dropout( rate=bert_config.hidden_dropout_prob)(pooled_output) output = tf.keras.layers.Dense(num_labels, kernel_initializer=initializer, name='output', dtype=float_type)(output) return tf.keras.Model(inputs={ 'input_word_ids': input_word_ids, 'input_mask': input_mask, 'input_type_ids': input_type_ids }, outputs=output), bert_model
def squad_model(bert_config, max_seq_length, float_type, initializer=None): """Returns BERT Squad model along with core BERT model to import weights. Args: bert_config: BertConfig, the config defines the core Bert model. max_seq_length: integer, the maximum input sequence length. float_type: tf.dtype, tf.float32 or tf.bfloat16. initializer: Initializer for weights in BertSquadLogitsLayer. Returns: Two tensors, start logits and end logits, [batch x sequence length]. """ unique_ids = tf.keras.layers.Input(shape=(1, ), dtype=tf.int32, name='unique_ids') input_word_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype=tf.int32, name='input_ids') input_mask = tf.keras.layers.Input(shape=(max_seq_length, ), dtype=tf.int32, name='input_mask') input_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype=tf.int32, name='segment_ids') core_model = modeling.get_bert_model(input_word_ids, input_mask, input_type_ids, config=bert_config, name='bert_model', float_type=float_type) # `BertSquadModel` only uses the sequnce_output which # has dimensionality (batch_size, sequence_length, num_hidden). sequence_output = core_model.outputs[1] if initializer is None: initializer = tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range) squad_logits_layer = BertSquadLogitsLayer(initializer=initializer, float_type=float_type, name='squad_logits') start_logits, end_logits = squad_logits_layer(sequence_output) squad = tf.keras.Model(inputs={ 'unique_ids': unique_ids, 'input_ids': input_word_ids, 'input_mask': input_mask, 'segment_ids': input_type_ids, }, outputs=[unique_ids, start_logits, end_logits], name='squad_model') return squad, core_model
def load_pretrained_bert_model(bert_dir, max_length): bert_model = modeling.get_bert_model( tf.keras.layers.Input( shape=(max_length,), dtype=tf.int32, name='input_wod_ids'), tf.keras.layers.Input( shape=(max_length,), dtype=tf.int32, name='input_mask'), tf.keras.layers.Input( shape=(max_length,), dtype=tf.int32, name='input_type_ids'), config=modeling.BertConfig.from_json_file(os.path.join(bert_dir, 'bert_config.json')), float_type=tf.float32) # load pretrained model init_checkpoint = os.path.join(bert_dir, 'bert_model.ckpt') checkpoint = tf.train.Checkpoint(model=bert_model) checkpoint.restore(init_checkpoint) return bert_model
def pretrain_model(bert_config, seq_length, max_predictions_per_seq, initializer=None): """Returns model to be used for pre-training. Args: bert_config: Configuration that defines the core BERT model. seq_length: Maximum sequence length of the training data. max_predictions_per_seq: Maximum number of tokens in sequence to mask out and use for pretraining. initializer: Initializer for weights in BertPretrainLayer. Returns: Pretraining model as well as core BERT submodel from which to save weights after pretraining. """ input_word_ids = tf.keras.layers.Input(shape=(seq_length, ), name='input_word_ids', dtype=tf.int32) input_mask = tf.keras.layers.Input(shape=(seq_length, ), name='input_mask', dtype=tf.int32) input_type_ids = tf.keras.layers.Input(shape=(seq_length, ), name='input_type_ids', dtype=tf.int32) masked_lm_positions = tf.keras.layers.Input( shape=(max_predictions_per_seq, ), name='masked_lm_positions', dtype=tf.int32) masked_lm_weights = tf.keras.layers.Input( shape=(max_predictions_per_seq, ), name='masked_lm_weights', dtype=tf.int32) next_sentence_labels = tf.keras.layers.Input(shape=(1, ), name='next_sentence_labels', dtype=tf.int32) masked_lm_ids = tf.keras.layers.Input(shape=(max_predictions_per_seq, ), name='masked_lm_ids', dtype=tf.int32) bert_submodel_name = 'bert_core_layer' bert_submodel = modeling.get_bert_model(input_word_ids, input_mask, input_type_ids, name=bert_submodel_name, config=bert_config) pooled_output = bert_submodel.outputs[0] sequence_output = bert_submodel.outputs[1] pretrain_layer = BertPretrainLayer( bert_config, bert_submodel.get_layer(bert_submodel_name), initializer=initializer) lm_output, sentence_output = pretrain_layer(pooled_output, sequence_output, masked_lm_positions) pretrain_loss_layer = BertPretrainLossAndMetricLayer(bert_config) output_loss = pretrain_loss_layer(lm_output, sentence_output, masked_lm_ids, masked_lm_weights, next_sentence_labels) return tf.keras.Model(inputs={ 'input_word_ids': input_word_ids, 'input_mask': input_mask, 'input_type_ids': input_type_ids, 'masked_lm_positions': masked_lm_positions, 'masked_lm_ids': masked_lm_ids, 'masked_lm_weights': masked_lm_weights, 'next_sentence_labels': next_sentence_labels, }, outputs=output_loss), bert_submodel