Esempio n. 1
0
    def build(self):
        """ALBert模型构建函数"""
        input_ids = tf.keras.layers.Input(shape=(self.max_seq_len, ),
                                          name='Input-Token')
        model_inputs = [input_ids]
        if self.use_token_type:
            token_type_ids = tf.keras.layers.Input(shape=(self.max_seq_len, ),
                                                   name='Input-Segment')
            model_inputs.append(token_type_ids)
        else:
            token_type_ids = tf.keras.layers.Lambda(
                lambda x: create_token_type_ids(x),
                name='Input-Segment')(input_ids)

        # attention_mask和bert中的input_mask一致
        attention_mask = tf.keras.layers.Lambda(
            lambda x: get_input_mask(x), name="Attention-Mask")(input_ids)
        embeddings = self._embeddings(input_ids, token_type_ids)

        # embedding 因式分解
        if self.embedding_size != self.hidden_size:
            embeddings = tf.keras.layers.Dense(
                self.hidden_size,
                kernel_initializer=get_initializer(self.initializer_range),
                name="Factor-Dense")(embeddings)

        # 主要Transformer Encoder部分
        self.all_layer_outputs = []

        prev_output = embeddings
        attention_name = 'Encoder-MultiHeadSelfAttention'
        feed_forward_name = 'Encoder-FeedForward'
        layers = self.main_layer(attention_name, feed_forward_name)

        for i in range(self.num_hidden_layers):
            encoder_output = self.transformer_block(
                inputs=prev_output,
                attention_mask=attention_mask,
                layers=
                layers  # 层复用,tensorflow代码中采用的variable_scope来复用变量,keras直接复用就行
            )
            self.all_layer_outputs.append(encoder_output)
            prev_output = encoder_output

        # pooler,取[CLS]的输出做一次线性变换,用于句子或者句对的分类
        sequence_output = self.all_layer_outputs[-1]
        first_token_tensor = tf.keras.layers.Lambda(
            lambda x: x[:, 0], name='Pooler')(sequence_output)
        self.pooler_output = tf.keras.layers.Dense(
            self.hidden_size,
            activation='tanh',
            kernel_initializer=get_initializer(self.initializer_range),
            name="Pooler-Dense")(first_token_tensor)

        # sequence_output, pooler_output
        outputs = [sequence_output, self.pooler_output]

        self.model = tf.keras.Model(model_inputs, outputs)
        for layer in self.model.layers:
            layer.trainable = self._trainable(layer)
Esempio n. 2
0
    def __init__(self,
                 config,
                 trainable=True,
                 training=True,
                 max_seq_len=None,
                 **kwargs):
        super(ALBertForPretraining, self).__init__(config, trainable, training,
                                                   max_seq_len, **kwargs)
        self.bert = ALBertModel(config,
                                trainable=trainable,
                                training=training,
                                max_seq_len=max_seq_len,
                                **kwargs)
        self.input_embeddings = self.bert.get_token_embeddings()

        # NSP
        self.seq_relationship = tf.keras.layers.Dense(
            2,
            kernel_initializer=get_initializer(config.initializer_range),
            name='NSP')
        # MLM
        self.mlm_dense = tf.keras.layers.Dense(
            config.embedding_size,
            kernel_initializer=get_initializer(config.initializer_range),
            name='MLM-Dense')
        self.transform_act_fn = ACT2FN[config.hidden_act]
        self.LayerNorm = LayerNormalization(epsilon=config.layer_norm_eps,
                                            name='MLM-Norm')
        self.bais_add = BiasAdd(initializer_range=config.initializer_range,
                                name='MLM-Proba')
Esempio n. 3
0
    def __init__(self,
                 config,
                 trainable=True,
                 training=False,
                 max_seq_len=None,
                 **kwargs):
        super(DistillBertForSequenceClassification,
              self).__init__(config, trainable, training, max_seq_len,
                             **kwargs)
        self.bert = DistillBertModel(config,
                                     trainable=trainable,
                                     training=training,
                                     max_seq_len=max_seq_len,
                                     **kwargs)
        num_labels = int(kwargs.pop('num_labels', 2))

        self.pre_classifier = tf.keras.layers.Dense(
            config.hidden_size,
            kernel_initializer=get_initializer(config.initializer_range),
            activation='relu',
            name="pre_classifier")
        self.dropout = tf.keras.layers.Dropout(
            rate=config.sequence_classif_dropout_prob, name='classifier-drop')
        self.classifier = tf.keras.layers.Dense(
            units=num_labels,
            activation='softmax',
            kernel_regularizer=tf.keras.regularizers.l2(0.01),
            kernel_initializer=get_initializer(config.initializer_range),
            name="classifier")
Esempio n. 4
0
    def _embeddings(self, input_ids, position_ids=None):
        self.share_token_embeddings = tf.keras.layers.Embedding(
            input_dim=self.vocab_size,
            output_dim=self.embedding_size,
            embeddings_initializer=get_initializer(self.initializer_range),
            name='Embedding-Token')
        self.token_embeddings = self.share_token_embeddings(input_ids)
        if position_ids is None:
            position_ids = tf.keras.layers.Lambda(
                lambda x: create_position_ids(x),
                name='Input-Position')(input_ids)

        position_embeddings = tf.keras.layers.Embedding(
            input_dim=self.max_position_embeddings,
            output_dim=self.embedding_size,
            embeddings_initializer=get_initializer(self.initializer_range),
            name='Embedding-Position')(position_ids)

        embeddings = tf.keras.layers.Add(name='Embedding-Add')(
            [self.token_embeddings, position_embeddings])

        embeddings = LayerNormalization(epsilon=self.layer_norm_eps,
                                        name='Embedding-Norm')(embeddings)
        embeddings = tf.keras.layers.Dropout(
            rate=self.hidden_dropout_prob,
            name='Embedding-Dropout')(embeddings)

        return embeddings
Esempio n. 5
0
    def build(self, input_shape):
        super(FeedForward, self).build(input_shape)
        # The activation is only applied to the "intermediate" hidden layer.
        self.intermediate = tf.keras.layers.Dense(
            self.intermediate_size,
            kernel_initializer=get_initializer(self.initializer_range))

        self.intermediate_act_fn = ACT2FN[self.hidden_act]

        # Down-project back to `hidden_size` then add the residual.
        self.down_project = tf.keras.layers.Dense(
            self.hidden_size,
            kernel_initializer=get_initializer(self.initializer_range))
Esempio n. 6
0
    def __init__(self,
                 config,
                 trainable=True,
                 training=True,
                 max_seq_len=None,
                 **kwargs):
        super(DistillBertForPretraining,
              self).__init__(config, trainable, training, max_seq_len,
                             **kwargs)
        self.distillbert = DistillBertModel(config,
                                            trainable=trainable,
                                            training=training,
                                            max_seq_len=max_seq_len,
                                            **kwargs)

        # MLM
        self.transform = tf.keras.layers.Dense(
            config.hidden_size,
            kernel_initializer=get_initializer(config.initializer_range),
            name='MLM-Dense')
        self.transform_act_fn = ACT2FN[config.hidden_act]
        self.LayerNorm = LayerNormalization(epsilon=config.layer_norm_eps,
                                            name='MLM-Norm')
        self.bais_add = BiasAdd(initializer_range=config.initializer_range,
                                name='MLM-Proba')
Esempio n. 7
0
    def build(self, input_shape):
        super(ALBertMultiHeadSelfAttention, self).build(input_shape)
        self.query = tf.keras.layers.Dense(self.all_head_size,
                                           kernel_initializer=get_initializer(
                                               self.initializer_range),
                                           name='query')
        self.key = tf.keras.layers.Dense(self.all_head_size,
                                         kernel_initializer=get_initializer(
                                             self.initializer_range),
                                         name='key')
        self.value = tf.keras.layers.Dense(self.all_head_size,
                                           kernel_initializer=get_initializer(
                                               self.initializer_range),
                                           name='value')

        self.dropout = tf.keras.layers.Dropout(
            rate=self.attention_probs_dropout_prob)

        self.linear = tf.keras.layers.Dense(self.all_head_size,
                                            kernel_initializer=get_initializer(
                                                self.initializer_range),
                                            name='linear')
Esempio n. 8
0
    def build(self):
        """Bert模型构建函数"""
        # 设置输入
        input_ids = tf.keras.layers.Input(shape=(self.max_seq_len, ),
                                          name='Input-Token')
        model_inputs = [input_ids]
        if self.use_token_type:
            token_type_ids = tf.keras.layers.Input(shape=(self.max_seq_len, ),
                                                   name='Input-Segment')
            model_inputs.append(token_type_ids)
        else:
            token_type_ids = tf.keras.layers.Lambda(
                lambda x: create_token_type_ids(x),
                name='Input-Segment')(input_ids)

        embeddings = self._embeddings(input_ids, token_type_ids)

        # 主要Transformer Encoder部分
        attention_mask = tf.keras.layers.Lambda(
            lambda x: get_input_mask(x), name="Attention-Mask")(input_ids)
        self.all_layer_outputs = []

        prev_output = embeddings
        for i in range(self.num_hidden_layers):
            attention_name = 'Encoder-%d-MultiHeadSelfAttention' % (i + 1)
            feed_forward_name = 'Encoder-%d-FeedForward' % (i + 1)
            encoder_output = self.transformer_block(
                inputs=prev_output,
                attention_mask=attention_mask,
                attention_name=attention_name,
                feed_forward_name=feed_forward_name)
            self.all_layer_outputs.append(encoder_output)
            prev_output = encoder_output

        # pooler,取[CLS]的输出做一次线性变换,用于句子或者句队的分类
        sequence_output = self.all_layer_outputs[-1]
        first_token_tensor = tf.keras.layers.Lambda(
            lambda x: x[:, 0], name='Pooler')(sequence_output)
        self.pooler_output = tf.keras.layers.Dense(
            self.hidden_size,
            activation='tanh',
            kernel_initializer=get_initializer(self.initializer_range),
            name="Pooler-Dense")(first_token_tensor)

        # sequence_output, pooler_output
        outputs = [self.all_layer_outputs[-1], self.pooler_output]

        self.model = tf.keras.Model(model_inputs, outputs)
        for layer in self.model.layers:
            layer.trainable = self._trainable(layer)
Esempio n. 9
0
 def __init__(self,
              config,
              trainable=True,
              training=False,
              max_seq_len=None,
              **kwargs):
     super(ALBertForQuestionAnswering,
           self).__init__(config, trainable, training, max_seq_len,
                          **kwargs)
     self.bert = ALBertModel(config,
                             trainable=trainable,
                             training=training,
                             max_seq_len=max_seq_len,
                             **kwargs)
     num_labels = int(kwargs.pop('num_labels', 2))
     self.qa_outputs = tf.keras.layers.Dense(
         units=num_labels,
         activation='softmax',
         kernel_regularizer=tf.keras.regularizers.l2(0.001),
         kernel_initializer=get_initializer(config.initializer_range),
         name="qa_outputs")