def test_encoder_decoder_save_load_from_encoder_decoder(self):
        config = self.get_encoder_decoder_config_small()

        # create two random BERT models for bert2bert & initialize weights (+cross_attention weights)
        encoder = TFBertModel(config.encoder)
        encoder(encoder.dummy_inputs)
        decoder = TFBertLMHeadModel(config.decoder)
        decoder(decoder.dummy_inputs)

        encoder_decoder_orig = TFEncoderDecoderModel(encoder=encoder,
                                                     decoder=decoder)

        input_ids = ids_tensor([13, 5], encoder.config.vocab_size)
        decoder_input_ids = ids_tensor([13, 1], decoder.config.vocab_size)

        logits_orig = encoder_decoder_orig(
            input_ids=input_ids, decoder_input_ids=decoder_input_ids).logits

        with tempfile.TemporaryDirectory() as tmp_dirname:
            encoder_path = os.path.join(tmp_dirname, "encoder")
            decoder_path = os.path.join(tmp_dirname, "decoder")

            encoder.save_pretrained(encoder_path)
            decoder.save_pretrained(decoder_path)

            encoder_decoder = TFEncoderDecoderModel.from_encoder_decoder_pretrained(
                encoder_path, decoder_path)

        logits_1 = encoder_decoder(input_ids=input_ids,
                                   decoder_input_ids=decoder_input_ids).logits

        self.assertTrue(
            logits_orig.numpy().sum() - logits_1.numpy().sum() < 1e-3)

        max_diff = np.max(np.abs(logits_1.numpy() - logits_orig.numpy()))
        self.assertAlmostEqual(max_diff, 0.0, places=4)

        with tempfile.TemporaryDirectory() as tmp_dirname:
            encoder_decoder.save_pretrained(tmp_dirname)
            encoder_decoder = TFEncoderDecoderModel.from_pretrained(
                tmp_dirname)

        logits_2 = encoder_decoder(input_ids=input_ids,
                                   decoder_input_ids=decoder_input_ids).logits

        max_diff = np.max(np.abs(logits_2.numpy() - logits_orig.numpy()))
        self.assertAlmostEqual(max_diff, 0.0, places=4)
 def get_encoder_decoder_models(self):
     encoder_model = TFBertModel.from_pretrained("bert-base-cased",
                                                 name="encoder")
     decoder_model = TFBertLMHeadModel.from_pretrained(
         "bert-base-cased",
         config=self.get_decoder_config(),
         name="decoder")
     return {"encoder": encoder_model, "decoder": decoder_model}
Beispiel #3
0
def build_model(pretrained_path,config,MAX_LEN,vocab_size):#keep_tokens):
    ids = tf.keras.layers.Input((MAX_LEN,), dtype=tf.int32)
    token_id = tf.keras.layers.Input((MAX_LEN,), dtype=tf.int32)
    att = tf.keras.layers.Input((MAX_LEN,MAX_LEN), dtype=tf.int32)
    config.output_hidden_states = True
    config.is_decoder = True
    config.hierarchical = True
    #bert_model = TFBertModel.from_pretrained(pretrained_path,config=config)
    #bert_model.bert.set_input_embeddings(tf.gather(bert_model.bert.embeddings.word_embeddings,keep_tokens))
    bert_model = TFBertLMHeadModel.from_pretrained(pretrained_path,config=config,from_pt=True)
    x = bert_model(ids,token_type_ids=token_id,attention_mask=att)
    out_put = x.logits
    #word_embeeding = bert_model.bert.embeddings.word_embeddings
    #embeddding_trans = tf.transpose(word_embeeding)
    #sof_output = tf.matmul(layer_1,embeddding_trans)
    out_put = tf.keras.layers.Activation('softmax')(out_put)
    output = CrossEntropy(2)([ids,token_id,out_put])
    model = tf.keras.models.Model(inputs=[ids,token_id,att],outputs=output)
    optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)
    model.compile(optimizer=optimizer)
    model.summary()
    return model
 def get_encoder_decoder_model(self, config, decoder_config):
     encoder_model = TFBertModel(config, name="encoder")
     decoder_model = TFBertLMHeadModel(decoder_config, name="decoder")
     return encoder_model, decoder_model