Beispiel #1
0
def test_unilm_bert_attention_mask(type_ids, sequence_mask) -> None:
    mask = np.array([
        [
            [1, 1, 1, 0, 0, 0],
            [1, 1, 1, 0, 0, 0],
            [1, 1, 1, 0, 0, 0],
            [1, 1, 1, 1, 0, 0],
            [1, 1, 1, 1, 1, 0],
            [1, 1, 1, 1, 1, 1],
        ],
        [
            [1, 1, 0, 0, 0, 0],
            [1, 1, 0, 0, 0, 0],
            [1, 1, 1, 0, 0, 0],
            [1, 1, 1, 1, 0, 0],
            [1, 1, 1, 1, 1, 0],
            [0, 0, 0, 0, 0, 0],
        ],
        [
            [1, 1, 0, 0, 0, 0],
            [1, 1, 0, 0, 0, 0],
            [1, 1, 1, 0, 0, 0],
            [1, 1, 1, 1, 0, 0],
            [0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0],
        ],
    ])
    np.testing.assert_array_almost_equal(
        BertAttentionMaskLayer(mask_mode="unilm")([type_ids, sequence_mask]),
        mask)
Beispiel #2
0
 def export(
     self,
     directory: str,
     name: str,
     version: str = "0",
     only_output_cls: bool = False,
 ) -> None:
     self.text_input = tf.keras.Input(shape=(),
                                      dtype=tf.string,
                                      name="text_input")
     preprocessing_layer = BertPreprocessingLayer(self.vocab.sorted_tokens)
     attention_mask_layer = BertAttentionMaskLayer()
     token_ids, type_ids = preprocessing_layer(self.text_input)
     mask = tf.not_equal(token_ids, 0)
     attention_mask = attention_mask_layer([type_ids, mask])
     outputs = self.pretrain_layer(
         [token_ids, type_ids, attention_mask,
          tf.ones_like(token_ids)])
     if only_output_cls:
         self._inference_model = tf.keras.Model(
             inputs=self.text_input,
             outputs=outputs[0],
         )
     else:
         self._inference_model = tf.keras.Model(
             inputs=self.text_input,
             outputs=[outputs[0], outputs[1][-1]],
         )
     super().export(directory, name, version)
     d = os.path.join(directory, name, version)
     self.save_vocab(d)
Beispiel #3
0
 def build_encoding_layer(self, inputs: list[tf.Tensor]) -> list[tf.Tensor]:
     token_ids, type_ids, logits_mask = inputs
     mask = tf.math.not_equal(token_ids, 0)
     return self.pretrain_layer([
         token_ids,
         type_ids,
         BertAttentionMaskLayer()([type_ids, mask]),
         logits_mask,
     ])
 def build_encoding_layer(self, inputs: list[tf.Tensor]) -> list[tf.Tensor]:
     token_ids, type_ids = inputs
     mask = tf.math.not_equal(token_ids, 0)
     return [
         *self.text2vec([
             token_ids, type_ids,
             BertAttentionMaskLayer()([type_ids, mask])
         ]),
         mask,
     ]
    def build_encoding_layer(self, inputs: list[tf.Tensor]) -> list[tf.Tensor]:
        token_ids, type_ids = inputs
        mask = tf.math.not_equal(token_ids, 0)

        logits_mask = tf.roll(type_ids, -1, 1)
        logits = self.text2vec(
            [
                token_ids,
                type_ids,
                BertAttentionMaskLayer(mask_mode="unilm")([type_ids, mask]),
            ],
            logits_mask=logits_mask,
        )[2]
        return logits, token_ids, type_ids
Beispiel #6
0
    def build_encoding_layer(self, inputs: list[tf.Tensor]) -> list[tf.Tensor]:
        if self.output_format == "bio":
            token_ids, type_ids, tag_ids = inputs
        else:
            token_ids, type_ids = inputs
        mask_layer = tf.keras.layers.Lambda(
            lambda ids: tf.not_equal(ids, 0), name="mask_layer"
        )
        mask = mask_layer(token_ids)

        outputs = [
            self.text2vec(
                [token_ids, type_ids, BertAttentionMaskLayer()([type_ids, mask])]
            )[1],
            mask,
        ]
        if self.output_format == "bio":
            outputs.append(tag_ids)
        return outputs
Beispiel #7
0
 def build(self, input_shape: tf.TensorShape) -> None:
     self.bert_layer = BertLayer.from_config(self.bert_config)
     self.attention_mask_layer = BertAttentionMaskLayer()
     super().build(input_shape)