def test_load_pretrained_bert(self): model_paths = [ # 'chinese_wwm_ext_L-12_H-768_A-12', # 'chinese_L-12_H-768_A-12', 'chinese_roberta_wwm_ext_L-12_H-768_A-12', # 'chinese_roberta_wwm_large_ext_L-24_H-1024_A-16' ] for p in model_paths: model = Bert.from_pretrained(os.path.join(CHINESE_BERT_PATH, p), verbose=False, check_weights=True) model.summary() self._do_predict(model) for mw, bw in zip(model.trainable_weights, model.bert_model.trainable_weights): print('{} -> {}'.format(mw.name, bw.name)) # skip weights model = Bert.from_pretrained(os.path.join(CHINESE_BERT_PATH, model_paths[0]), skip_token_embedding=True, skip_pooler=True, verbose=False) model.summary() self._do_predict(model)
def _build_bert_model(trainable=True): input_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name='input_ids') segment_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name='segment_ids') attention_mask = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name='attention_mask') bert = Bert.from_pretrained( os.path.join(CHINESE_BERT_PATH, 'chinese_roberta_wwm_ext_L-12_H-768_A-12')) bert.trainable = trainable _, pooled_output = bert( inputs=[input_ids, segment_ids, attention_mask]) outputs = tf.keras.layers.Dense(2, name='output')(pooled_output) model = tf.keras.Model( inputs=[input_ids, segment_ids, attention_mask], outputs=outputs) model.compile(loss='binary_cross_entropy', optimizer='adam') return model
def _build_model(self): model = Bert.from_pretrained(os.path.join( BASE_DIR, 'bert_uncased_L-6_H-768_A-12'), return_states=True, verbose=False) return model
def _build_model(self): model = Bert.from_pretrained(os.path.join(GOOGLE_BERT_PATH, "uncased_L-6_H-768_A-12"), return_states=True, verbose=False) return model