Beispiel #1
0
    def test_load_pretrained_bert(self):
        model_paths = [
            # 'chinese_wwm_ext_L-12_H-768_A-12',
            # 'chinese_L-12_H-768_A-12',
            'chinese_roberta_wwm_ext_L-12_H-768_A-12',
            # 'chinese_roberta_wwm_large_ext_L-24_H-1024_A-16'
        ]
        for p in model_paths:
            model = Bert.from_pretrained(os.path.join(CHINESE_BERT_PATH, p),
                                         verbose=False,
                                         check_weights=True)
            model.summary()
            self._do_predict(model)

            for mw, bw in zip(model.trainable_weights,
                              model.bert_model.trainable_weights):
                print('{} -> {}'.format(mw.name, bw.name))

        # skip weights
        model = Bert.from_pretrained(os.path.join(CHINESE_BERT_PATH,
                                                  model_paths[0]),
                                     skip_token_embedding=True,
                                     skip_pooler=True,
                                     verbose=False)
        model.summary()
        self._do_predict(model)
 def test_export_saved_model(self):
     model = Bert(
         vocab_size=21128, num_layers=12, num_attention_heads=8, return_states=True, return_attention_weights=True
     )
     input_ids, segment_ids, input_mask = model.dummy_inputs()
     model(inputs=[input_ids, segment_ids, input_mask])
     model.summary()
     model.save("models/export/2", include_optimizer=False)
    def _check_bert_outputs(self, return_states=False, return_attention_weights=False):
        NUM_LAYERS = 4
        model = Bert(
            vocab_size=100,
            num_layers=NUM_LAYERS,
            return_states=return_states,
            return_attention_weights=return_attention_weights,
        )
        input_ids, segment_ids, attn_mask = self._build_bert_inputs()
        outputs = model(inputs=[input_ids, segment_ids, attn_mask])
        sequence_outputs, pooled_outputs = outputs[0], outputs[1]
        self.assertAllEqual([2, 16, 768], sequence_outputs.shape)
        self.assertAllEqual([2, 768], pooled_outputs.shape)

        all_states, all_attn_weights = None, None
        if return_states and return_attention_weights:
            self.assertEqual(4, len(outputs))
            all_states, all_attn_weights = outputs[2], outputs[3]
        elif return_states and not return_attention_weights:
            self.assertEqual(3, len(outputs))
            all_states = outputs[2]
        elif not return_states and return_attention_weights:
            self.assertEqual(3, len(outputs))
            all_attn_weights = outputs[2]
        else:
            self.assertEqual(2, len(outputs))

        if all_states is not None:
            # self.assertEqual(2, len(all_states))
            # for state in all_states:
            #     self.assertAllEqual([2, 16, 768], state.shape)
            self.assertAllEqual([2, NUM_LAYERS, 16, 768], all_states.shape)

        if all_attn_weights is not None:
            # self.assertEqual(2, len(all_attn_weights))
            # for attention in all_attn_weights:
            #     self.assertAllEqual([2, 8, 16, 16], attention.shape)
            self.assertAllEqual([2, NUM_LAYERS, 8, 16, 16], all_attn_weights.shape)
Beispiel #4
0
        def _build_bert_model(trainable=True):
            input_ids = tf.keras.layers.Input(shape=(None, ),
                                              dtype=tf.int32,
                                              name='input_ids')
            segment_ids = tf.keras.layers.Input(shape=(None, ),
                                                dtype=tf.int32,
                                                name='segment_ids')
            attention_mask = tf.keras.layers.Input(shape=(None, ),
                                                   dtype=tf.int32,
                                                   name='attention_mask')

            bert = Bert.from_pretrained(
                os.path.join(CHINESE_BERT_PATH,
                             'chinese_roberta_wwm_ext_L-12_H-768_A-12'))
            bert.trainable = trainable

            _, pooled_output = bert(
                inputs=[input_ids, segment_ids, attention_mask])
            outputs = tf.keras.layers.Dense(2, name='output')(pooled_output)
            model = tf.keras.Model(
                inputs=[input_ids, segment_ids, attention_mask],
                outputs=outputs)
            model.compile(loss='binary_cross_entropy', optimizer='adam')
            return model
Beispiel #5
0
 def _build_model(self):
     model = Bert.from_pretrained(os.path.join(
         BASE_DIR, 'bert_uncased_L-6_H-768_A-12'),
                                  return_states=True,
                                  verbose=False)
     return model
 def test_build_model(self):
     model = Bert(vocab_size=21128)
     input_ids, segment_ids, input_mask = model.dummy_inputs()
     model(inputs=[input_ids, segment_ids, input_mask])
     model.summary()
 def test_bert_config(self):
     model = Bert(vocab_size=100, num_layers=2, return_states=True, return_attention_weights=True)
     config = model.get_config()
     print(config)
Beispiel #8
0
 def _build_model(self):
     model = Bert.from_pretrained(os.path.join(GOOGLE_BERT_PATH,
                                               "uncased_L-6_H-768_A-12"),
                                  return_states=True,
                                  verbose=False)
     return model