def classifier_model(bert_config, num_labels, max_seq_length, final_layer_initializer=None, hub_module_url=None): """BERT classifier model in functional API style. Construct a Keras model for predicting `num_labels` outputs from an input with maximum sequence length `max_seq_length`. Args: bert_config: BertConfig or AlbertConfig, the config defines the core BERT or ALBERT model. num_labels: integer, the number of classes. max_seq_length: integer, the maximum input sequence length. final_layer_initializer: Initializer for final dense layer. Defaulted TruncatedNormal initializer. hub_module_url: TF-Hub path/url to Bert module. Returns: Combined prediction model (words, mask, type) -> (one-hot labels) BERT sub-model (words, mask, type) -> (bert_outputs) """ if final_layer_initializer is not None: initializer = final_layer_initializer else: initializer = tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range) if not hub_module_url: bert_encoder = get_transformer_encoder(bert_config, max_seq_length) return bert_classifier.BertClassifier( bert_encoder, num_classes=num_labels, dropout_rate=bert_config.hidden_dropout_prob, initializer=initializer), bert_encoder input_word_ids = tf.keras.layers.Input( shape=(max_seq_length,), dtype=tf.int32, name='input_word_ids') input_mask = tf.keras.layers.Input( shape=(max_seq_length,), dtype=tf.int32, name='input_mask') input_type_ids = tf.keras.layers.Input( shape=(max_seq_length,), dtype=tf.int32, name='input_type_ids') bert_model = hub.KerasLayer(hub_module_url, trainable=True) pooled_output, _ = bert_model([input_word_ids, input_mask, input_type_ids]) output = tf.keras.layers.Dropout(rate=bert_config.hidden_dropout_prob)( pooled_output) output = tf.keras.layers.Dense( num_labels, kernel_initializer=initializer, name='output')( output) return tf.keras.Model( inputs={ 'input_word_ids': input_word_ids, 'input_mask': input_mask, 'input_type_ids': input_type_ids }, outputs=output), bert_model
def test_bert_trainer(self): """Validate that the Keras object can be created.""" # Build a transformer network to use within the BERT trainer. vocab_size = 100 sequence_length = 512 test_network = networks.TransformerEncoder( vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length) # Create a BERT trainer with the created network. num_classes = 3 bert_trainer_model = bert_classifier.BertClassifier( test_network, num_classes=num_classes) # Create a set of 2-dimensional inputs (the first dimension is implicit). word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) # Invoke the trainer model on the inputs. This causes the layer to be built. cls_outs = bert_trainer_model([word_ids, mask, type_ids]) # Validate that the outputs are of the expected shape. expected_classification_shape = [None, num_classes] self.assertAllEqual(expected_classification_shape, cls_outs.shape.as_list())
def test_bert_trainer_tensor_call(self): """Validate that the Keras object can be invoked.""" # Build a transformer network to use within the BERT trainer. (Here, we use # a short sequence_length for convenience.) test_network = networks.TransformerEncoder( vocab_size=100, num_layers=2, sequence_length=2) # Create a BERT trainer with the created network. bert_trainer_model = bert_classifier.BertClassifier( test_network, num_classes=2) # Create a set of 2-dimensional data tensors to feed into the model. word_ids = tf.constant([[1, 1], [2, 2]], dtype=tf.int32) mask = tf.constant([[1, 1], [1, 0]], dtype=tf.int32) type_ids = tf.constant([[1, 1], [2, 2]], dtype=tf.int32) # Invoke the trainer model on the tensors. In Eager mode, this does the # actual calculation. (We can't validate the outputs, since the network is # too complex: this simply ensures we're not hitting runtime errors.) _ = bert_trainer_model([word_ids, mask, type_ids])
def test_serialize_deserialize(self): """Validate that the BERT trainer can be serialized and deserialized.""" # Build a transformer network to use within the BERT trainer. (Here, we use # a short sequence_length for convenience.) test_network = networks.TransformerEncoder( vocab_size=100, num_layers=2, sequence_length=5) # Create a BERT trainer with the created network. (Note that all the args # are different, so we can catch any serialization mismatches.) bert_trainer_model = bert_classifier.BertClassifier( test_network, num_classes=4, initializer='zeros', output='predictions') # Create another BERT trainer via serialization and deserialization. config = bert_trainer_model.get_config() new_bert_trainer_model = bert_classifier.BertClassifier.from_config(config) # Validate that the config can be forced to JSON. _ = new_bert_trainer_model.to_json() # If the serialization was successful, the new config should match the old. self.assertAllEqual(bert_trainer_model.get_config(), new_bert_trainer_model.get_config())
def classifier_model(bert_config, float_type, num_labels, max_seq_length, final_layer_initializer=None, hub_module_url=None): """BERT classifier model in functional API style. Construct a Keras model for predicting `num_labels` outputs from an input with maximum sequence length `max_seq_length`. Args: bert_config: BertConfig, the config defines the core BERT model. float_type: dtype, tf.float32 or tf.bfloat16. num_labels: integer, the number of classes. max_seq_length: integer, the maximum input sequence length. final_layer_initializer: Initializer for final dense layer. Defaulted TruncatedNormal initializer. hub_module_url: TF-Hub path/url to Bert module. Returns: Combined prediction model (words, mask, type) -> (one-hot labels) BERT sub-model (words, mask, type) -> (bert_outputs) """ if final_layer_initializer is not None: initializer = final_layer_initializer else: initializer = tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range) if not hub_module_url: bert_encoder = networks.TransformerEncoder( vocab_size=bert_config.vocab_size, hidden_size=bert_config.hidden_size, num_layers=bert_config.num_hidden_layers, num_attention_heads=bert_config.num_attention_heads, intermediate_size=bert_config.intermediate_size, activation=tf_utils.get_activation('gelu'), dropout_rate=bert_config.hidden_dropout_prob, attention_dropout_rate=bert_config.attention_probs_dropout_prob, sequence_length=max_seq_length, max_sequence_length=bert_config.max_position_embeddings, type_vocab_size=bert_config.type_vocab_size, initializer=tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range)) return bert_classifier.BertClassifier( bert_encoder, num_classes=num_labels, dropout_rate=bert_config.hidden_dropout_prob, initializer=initializer), bert_encoder input_word_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype=tf.int32, name='input_word_ids') input_mask = tf.keras.layers.Input(shape=(max_seq_length, ), dtype=tf.int32, name='input_mask') input_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype=tf.int32, name='input_type_ids') bert_model = hub.KerasLayer(hub_module_url, trainable=True) pooled_output, _ = bert_model([input_word_ids, input_mask, input_type_ids]) output = tf.keras.layers.Dropout( rate=bert_config.hidden_dropout_prob)(pooled_output) output = tf.keras.layers.Dense(num_labels, kernel_initializer=initializer, name='output', dtype=float_type)(output) return tf.keras.Model(inputs={ 'input_word_ids': input_word_ids, 'input_mask': input_mask, 'input_type_ids': input_type_ids }, outputs=output), bert_model