예제 #1
0
    def test_load_pretrained_bert(self):
        model_paths = [
            # 'chinese_wwm_ext_L-12_H-768_A-12',
            # 'chinese_L-12_H-768_A-12',
            'chinese_roberta_wwm_ext_L-12_H-768_A-12',
            # 'chinese_roberta_wwm_large_ext_L-24_H-1024_A-16'
        ]
        for p in model_paths:
            model = Bert.from_pretrained(os.path.join(CHINESE_BERT_PATH, p),
                                         verbose=False,
                                         check_weights=True)
            model.summary()
            self._do_predict(model)

            for mw, bw in zip(model.trainable_weights,
                              model.bert_model.trainable_weights):
                print('{} -> {}'.format(mw.name, bw.name))

        # skip weights
        model = Bert.from_pretrained(os.path.join(CHINESE_BERT_PATH,
                                                  model_paths[0]),
                                     skip_token_embedding=True,
                                     skip_pooler=True,
                                     verbose=False)
        model.summary()
        self._do_predict(model)
예제 #2
0
        def _build_bert_model(trainable=True):
            input_ids = tf.keras.layers.Input(shape=(None, ),
                                              dtype=tf.int32,
                                              name='input_ids')
            segment_ids = tf.keras.layers.Input(shape=(None, ),
                                                dtype=tf.int32,
                                                name='segment_ids')
            attention_mask = tf.keras.layers.Input(shape=(None, ),
                                                   dtype=tf.int32,
                                                   name='attention_mask')

            bert = Bert.from_pretrained(
                os.path.join(CHINESE_BERT_PATH,
                             'chinese_roberta_wwm_ext_L-12_H-768_A-12'))
            bert.trainable = trainable

            _, pooled_output = bert(
                inputs=[input_ids, segment_ids, attention_mask])
            outputs = tf.keras.layers.Dense(2, name='output')(pooled_output)
            model = tf.keras.Model(
                inputs=[input_ids, segment_ids, attention_mask],
                outputs=outputs)
            model.compile(loss='binary_cross_entropy', optimizer='adam')
            return model
예제 #3
0
 def _build_model(self):
     model = Bert.from_pretrained(os.path.join(
         BASE_DIR, 'bert_uncased_L-6_H-768_A-12'),
                                  return_states=True,
                                  verbose=False)
     return model
예제 #4
0
 def _build_model(self):
     model = Bert.from_pretrained(os.path.join(GOOGLE_BERT_PATH,
                                               "uncased_L-6_H-768_A-12"),
                                  return_states=True,
                                  verbose=False)
     return model