def build_small_model(self, model_cfg): encoder_cfg = model_cfg['encoder']['bert'] dataconf = self.task_config.train_data encoder_network = small_encoder_lib.TransformerEncoder( vocab_size=encoder_cfg['vocab_size'], hidden_size=encoder_cfg['hidden_size'], num_layers=encoder_cfg['num_layers'], num_attention_heads=encoder_cfg['num_attention_heads'], intermediate_size=encoder_cfg['intermediate_size'], activation=tf_utils.get_activation( encoder_cfg['hidden_activation']), dropout_rate=encoder_cfg['dropout_rate'], attention_dropout_rate=encoder_cfg['attention_dropout_rate'], max_sequence_length=encoder_cfg['max_position_embeddings'], type_vocab_size=encoder_cfg['type_vocab_size'], initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg['initializer_range']), net2net_ratio=encoder_cfg['net2net_ratio'], net2net_layers=encoder_cfg['net2net_layers'], lightatt_layers=encoder_cfg['lightatt_layers'], input_pool_name=encoder_cfg['input_pool_name'], input_pool_size=encoder_cfg['input_pool_size']) sequence_length = dataconf.seq_length predict_length = dataconf.max_predictions_per_seq dummy_inputs = dict(input_mask=tf.zeros((1, sequence_length), dtype=tf.int32), input_positions=tf.zeros((1, sequence_length), dtype=tf.int32), input_type_ids=tf.zeros((1, sequence_length), dtype=tf.int32), input_word_ids=tf.zeros((1, sequence_length), dtype=tf.int32), masked_lm_positions=tf.zeros((1, predict_length), dtype=tf.int32), masked_input_ids=tf.zeros((1, predict_length), dtype=tf.int32), masked_segment_ids=tf.zeros((1, predict_length), dtype=tf.int32), masked_lm_weights=tf.zeros((1, predict_length), dtype=tf.float32)) _ = encoder_network(dummy_inputs) if 'cls_heads' in model_cfg: classification_heads = [ layers.ClassificationHead(**cfg) for cfg in model_cfg['cls_heads'] ] else: classification_heads = [] model = small_pretrainer.BertPretrainModel( mlm_initializer=tf.keras.initializers.TruncatedNormal( stddev=encoder_cfg['initializer_range']), mlm_activation=tf_utils.get_activation( encoder_cfg['hidden_activation']), encoder_network=encoder_network, classification_heads=classification_heads) _ = model(dummy_inputs) return model
def test_bert_pretrainer(self): """Validate that the Keras object can be created.""" # Build a transformer network to use within the BERT trainer. vocab_size = 100 sequence_length = 512 test_network = transformer_encoder.TransformerEncoder( vocab_size=vocab_size, num_layers=2, max_sequence_length=sequence_length) # Create a set of 2-dimensional inputs (the first dimension is implicit). predict_length = 2 dummy_inputs = dict(input_mask=tf.zeros((1, sequence_length), dtype=tf.int32), input_positions=tf.zeros((1, sequence_length), dtype=tf.int32), input_type_ids=tf.zeros((1, sequence_length), dtype=tf.int32), input_word_ids=tf.zeros((1, sequence_length), dtype=tf.int32), masked_lm_positions=tf.zeros((1, predict_length), dtype=tf.int32), masked_input_ids=tf.zeros((1, predict_length), dtype=tf.int32), masked_segment_ids=tf.zeros((1, predict_length), dtype=tf.int32), masked_lm_weights=tf.zeros((1, predict_length), dtype=tf.float32)) _ = test_network(dummy_inputs) # Create a BERT trainer with the created network. bert_trainer_model = bert_pretrain_model.BertPretrainModel( test_network) # Invoke the trainer model on the inputs. This causes the layer to be built. outputs = bert_trainer_model(dummy_inputs) # Validate that the outputs are of the expected shape. expected_lm_shape = [1, predict_length, vocab_size] self.assertAllEqual(expected_lm_shape, outputs['mlm_logits'].shape.as_list())