def test_load_pretrained_bert(self): model_paths = [ # 'chinese_wwm_ext_L-12_H-768_A-12', # 'chinese_L-12_H-768_A-12', 'chinese_roberta_wwm_ext_L-12_H-768_A-12', # 'chinese_roberta_wwm_large_ext_L-24_H-1024_A-16' ] for p in model_paths: model = Bert.from_pretrained(os.path.join(CHINESE_BERT_PATH, p), verbose=False, check_weights=True) model.summary() self._do_predict(model) for mw, bw in zip(model.trainable_weights, model.bert_model.trainable_weights): print('{} -> {}'.format(mw.name, bw.name)) # skip weights model = Bert.from_pretrained(os.path.join(CHINESE_BERT_PATH, model_paths[0]), skip_token_embedding=True, skip_pooler=True, verbose=False) model.summary() self._do_predict(model)
def test_export_saved_model(self): model = Bert( vocab_size=21128, num_layers=12, num_attention_heads=8, return_states=True, return_attention_weights=True ) input_ids, segment_ids, input_mask = model.dummy_inputs() model(inputs=[input_ids, segment_ids, input_mask]) model.summary() model.save("models/export/2", include_optimizer=False)
def _check_bert_outputs(self, return_states=False, return_attention_weights=False): NUM_LAYERS = 4 model = Bert( vocab_size=100, num_layers=NUM_LAYERS, return_states=return_states, return_attention_weights=return_attention_weights, ) input_ids, segment_ids, attn_mask = self._build_bert_inputs() outputs = model(inputs=[input_ids, segment_ids, attn_mask]) sequence_outputs, pooled_outputs = outputs[0], outputs[1] self.assertAllEqual([2, 16, 768], sequence_outputs.shape) self.assertAllEqual([2, 768], pooled_outputs.shape) all_states, all_attn_weights = None, None if return_states and return_attention_weights: self.assertEqual(4, len(outputs)) all_states, all_attn_weights = outputs[2], outputs[3] elif return_states and not return_attention_weights: self.assertEqual(3, len(outputs)) all_states = outputs[2] elif not return_states and return_attention_weights: self.assertEqual(3, len(outputs)) all_attn_weights = outputs[2] else: self.assertEqual(2, len(outputs)) if all_states is not None: # self.assertEqual(2, len(all_states)) # for state in all_states: # self.assertAllEqual([2, 16, 768], state.shape) self.assertAllEqual([2, NUM_LAYERS, 16, 768], all_states.shape) if all_attn_weights is not None: # self.assertEqual(2, len(all_attn_weights)) # for attention in all_attn_weights: # self.assertAllEqual([2, 8, 16, 16], attention.shape) self.assertAllEqual([2, NUM_LAYERS, 8, 16, 16], all_attn_weights.shape)
def _build_bert_model(trainable=True): input_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name='input_ids') segment_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name='segment_ids') attention_mask = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name='attention_mask') bert = Bert.from_pretrained( os.path.join(CHINESE_BERT_PATH, 'chinese_roberta_wwm_ext_L-12_H-768_A-12')) bert.trainable = trainable _, pooled_output = bert( inputs=[input_ids, segment_ids, attention_mask]) outputs = tf.keras.layers.Dense(2, name='output')(pooled_output) model = tf.keras.Model( inputs=[input_ids, segment_ids, attention_mask], outputs=outputs) model.compile(loss='binary_cross_entropy', optimizer='adam') return model
def _build_model(self): model = Bert.from_pretrained(os.path.join( BASE_DIR, 'bert_uncased_L-6_H-768_A-12'), return_states=True, verbose=False) return model
def test_build_model(self): model = Bert(vocab_size=21128) input_ids, segment_ids, input_mask = model.dummy_inputs() model(inputs=[input_ids, segment_ids, input_mask]) model.summary()
def test_bert_config(self): model = Bert(vocab_size=100, num_layers=2, return_states=True, return_attention_weights=True) config = model.get_config() print(config)
def _build_model(self): model = Bert.from_pretrained(os.path.join(GOOGLE_BERT_PATH, "uncased_L-6_H-768_A-12"), return_states=True, verbose=False) return model