Esempio n. 1
0
    def build_small_model(self, model_cfg):
        encoder_cfg = model_cfg['encoder']['bert']
        dataconf = self.task_config.train_data
        encoder_network = small_encoder_lib.TransformerEncoder(
            vocab_size=encoder_cfg['vocab_size'],
            hidden_size=encoder_cfg['hidden_size'],
            num_layers=encoder_cfg['num_layers'],
            num_attention_heads=encoder_cfg['num_attention_heads'],
            intermediate_size=encoder_cfg['intermediate_size'],
            activation=tf_utils.get_activation(
                encoder_cfg['hidden_activation']),
            dropout_rate=encoder_cfg['dropout_rate'],
            attention_dropout_rate=encoder_cfg['attention_dropout_rate'],
            max_sequence_length=encoder_cfg['max_position_embeddings'],
            type_vocab_size=encoder_cfg['type_vocab_size'],
            initializer=tf.keras.initializers.TruncatedNormal(
                stddev=encoder_cfg['initializer_range']),
            net2net_ratio=encoder_cfg['net2net_ratio'],
            net2net_layers=encoder_cfg['net2net_layers'],
            lightatt_layers=encoder_cfg['lightatt_layers'],
            input_pool_name=encoder_cfg['input_pool_name'],
            input_pool_size=encoder_cfg['input_pool_size'])
        sequence_length = dataconf.seq_length
        predict_length = dataconf.max_predictions_per_seq
        dummy_inputs = dict(input_mask=tf.zeros((1, sequence_length),
                                                dtype=tf.int32),
                            input_positions=tf.zeros((1, sequence_length),
                                                     dtype=tf.int32),
                            input_type_ids=tf.zeros((1, sequence_length),
                                                    dtype=tf.int32),
                            input_word_ids=tf.zeros((1, sequence_length),
                                                    dtype=tf.int32),
                            masked_lm_positions=tf.zeros((1, predict_length),
                                                         dtype=tf.int32),
                            masked_input_ids=tf.zeros((1, predict_length),
                                                      dtype=tf.int32),
                            masked_segment_ids=tf.zeros((1, predict_length),
                                                        dtype=tf.int32),
                            masked_lm_weights=tf.zeros((1, predict_length),
                                                       dtype=tf.float32))
        _ = encoder_network(dummy_inputs)

        if 'cls_heads' in model_cfg:
            classification_heads = [
                layers.ClassificationHead(**cfg)
                for cfg in model_cfg['cls_heads']
            ]
        else:
            classification_heads = []
        model = small_pretrainer.BertPretrainModel(
            mlm_initializer=tf.keras.initializers.TruncatedNormal(
                stddev=encoder_cfg['initializer_range']),
            mlm_activation=tf_utils.get_activation(
                encoder_cfg['hidden_activation']),
            encoder_network=encoder_network,
            classification_heads=classification_heads)
        _ = model(dummy_inputs)
        return model
Esempio n. 2
0
    def test_bert_pretrainer(self):
        """Validate that the Keras object can be created."""
        # Build a transformer network to use within the BERT trainer.
        vocab_size = 100
        sequence_length = 512
        test_network = transformer_encoder.TransformerEncoder(
            vocab_size=vocab_size,
            num_layers=2,
            max_sequence_length=sequence_length)

        # Create a set of 2-dimensional inputs (the first dimension is implicit).
        predict_length = 2
        dummy_inputs = dict(input_mask=tf.zeros((1, sequence_length),
                                                dtype=tf.int32),
                            input_positions=tf.zeros((1, sequence_length),
                                                     dtype=tf.int32),
                            input_type_ids=tf.zeros((1, sequence_length),
                                                    dtype=tf.int32),
                            input_word_ids=tf.zeros((1, sequence_length),
                                                    dtype=tf.int32),
                            masked_lm_positions=tf.zeros((1, predict_length),
                                                         dtype=tf.int32),
                            masked_input_ids=tf.zeros((1, predict_length),
                                                      dtype=tf.int32),
                            masked_segment_ids=tf.zeros((1, predict_length),
                                                        dtype=tf.int32),
                            masked_lm_weights=tf.zeros((1, predict_length),
                                                       dtype=tf.float32))
        _ = test_network(dummy_inputs)

        # Create a BERT trainer with the created network.
        bert_trainer_model = bert_pretrain_model.BertPretrainModel(
            test_network)

        # Invoke the trainer model on the inputs. This causes the layer to be built.
        outputs = bert_trainer_model(dummy_inputs)

        # Validate that the outputs are of the expected shape.
        expected_lm_shape = [1, predict_length, vocab_size]
        self.assertAllEqual(expected_lm_shape,
                            outputs['mlm_logits'].shape.as_list())