def test_on_chinese_daily_ner(self): import numpy as np from posner.datasets import chinese_daily_ner from posner.utils.bert_tokenization import FullTokenizer (x_train, y_train), (x_test, y_test), (vocab, pos_tags) = \ chinese_daily_ner.load_data(path=None, maxlen=16, onehot=True, min_freq=2) current_path = os.path.dirname(os.path.abspath(__file__)) config_path = os.path.join(current_path, 'test_checkpoint', 'bert_config.json') model_path = os.path.join(current_path, 'test_checkpoint', 'bert_model.ckpt') model = load_trained_model_from_checkpoint( config_path, model_path, training=False, trainable=['Encoder'], ) model.summary(line_length=120) model.compile(optimizer='rmsprop', loss='mean_squared_error', metrics=['accuracy']) tokenizer = FullTokenizer( os.path.join(current_path, 'test_checkpoint', 'vocab.txt')) # text = 'all language' # x = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text)) x = np.zeros((1, 16)) x[0, 0] = 1 y = np.zeros((1, 16, 4)) y[0, 0, 0] = 1 model.fit([x, np.zeros((1, 16)).reshape(1, 16)], y, epochs=100)
def load_trained_model_from_checkpoint( config_file, checkpoint_file, crf_dims, training=False, trainable=None, output_layer_num=1, seq_len=int(1e9), **kwargs): model = bert.load_trained_model_from_checkpoint( config_file, checkpoint_file, training=training, # MLM, NSP use_adapter=True, trainable= # Adapter ['Encoder-{}-MultiHeadSelfAttention-Adapter'.format(i + 1) for i in range(layer_num)] + ['Encoder-{}-FeedForward-Adapter'.format(i + 1) for i in range(layer_num)] + ['Encoder-{}-MultiHeadSelfAttention-Norm'.format(i + 1) for i in range(layer_num)] + ['Encoder-{}-FeedForward-Norm'.format(i + 1) for i in range(layer_num)], ) crf = CRF(crf_dims, name='CRF') inp = model.input out = crf(model.layers[-9].output) model = keras.models.Model(inp, out) model.summary(line_length=150) return model
def test_load_training(self): current_path = os.path.dirname(os.path.abspath(__file__)) config_path = os.path.join(current_path, 'test_checkpoint', 'bert_config.json') model_path = os.path.join(current_path, 'test_checkpoint', 'bert_model.ckpt') model = load_trained_model_from_checkpoint(config_path, model_path, training=True) model.summary()
def test_load_with_trainable_prefixes(self): current_path = os.path.dirname(os.path.abspath(__file__)) config_path = os.path.join(current_path, 'test_checkpoint', 'bert_config.json') model_path = os.path.join(current_path, 'test_checkpoint', 'bert_model.ckpt') model = load_trained_model_from_checkpoint( config_path, model_path, training=False, trainable=['Encoder'], ) model.summary()
def test_load_adapter(self): current_path = os.path.dirname(os.path.abspath(__file__)) config_path = os.path.join(current_path, 'test_checkpoint', 'bert_config.json') model_path = os.path.join(current_path, 'test_checkpoint', 'bert_model.ckpt') model = load_trained_model_from_checkpoint( config_path, model_path, training=False, use_adapter=True, trainable=[ 'Encoder-{}-MultiHeadSelfAttention-Adapter'.format(i + 1) for i in range(2) ] + ['Encoder-{}-FeedForward-Adapter'.format(i + 1) for i in range(2)] + [ 'Encoder-{}-MultiHeadSelfAttention-Norm'.format(i + 1) for i in range(2) ] + ['Encoder-{}-FeedForward-Norm'.format(i + 1) for i in range(2)], ) model.summary()
def test_load_output_layer_num(self): current_path = os.path.dirname(os.path.abspath(__file__)) config_path = os.path.join(current_path, 'test_checkpoint', 'bert_config.json') model_path = os.path.join(current_path, 'test_checkpoint', 'bert_model.ckpt') model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=4) model.summary() model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=[0]) model.summary() model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=[1]) model.summary() model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=[-1]) model.summary() model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=[-2]) model.summary() model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=[0, -1]) model.summary()
def train_ner(): tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) ## TODO: update for nother datasets. # processor = NerProcessor() # train_examples = processor.get_train_examples(FLAGS.data_dir) # label_list = processor.get_labels() # output_dims = len(label_list) (x_train, y_train), (x_test, y_test), (vocab, pos_tags) = \ chinese_daily_ner.load_data(path=None, maxlen=FLAGS.max_seq_length, onehot=True, min_freq=2) output_dims = len(pos_tags) num_train_steps = int( len(x_train) * FLAGS.num_train_epochs / FLAGS.train_batch_size) if FLAGS.crf: model = bert_crf.load_trained_model_from_checkpoint( config_file=FLAGS.bert_config_file, checkpoint_file=FLAGS.init_checkpoint, crf_dims=output_dims, training=True, seq_len=FLAGS.max_seq_length, ) else: model = bert.load_trained_model_from_checkpoint( config_file=FLAGS.bert_config_file, checkpoint_file=FLAGS.init_checkpoint, training=True, seq_len=FLAGS.max_seq_length, ) bottle = tf.keras.layers.Dense(output_dims, activation='softmax', name='NER-output') inp = model.input out = bottle(model.layers[-9].output) # exlude MLM, NSP model = tf.keras.models.Model(inp, out) model.summary(line_length=150) logging.info("***** Running training *****") logging.info(" Num examples = %d", len(x_train)) logging.info(" Batch size = %d", FLAGS.train_batch_size) logging.info(" Num steps = %d", num_train_steps) warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) optimizer = AdamWarmup(decay_steps=FLAGS.decay_steps, warmup_steps=warmup_steps) if FLAGS.use_focal_loss: #TODO: test CategoricalFocalLoss from posner.losses.focal_loss import CategoricalFocalLoss focal_loss = CategoricalFocalLoss() model.compile(optimizer=optimizer, loss=focal_loss, metrics=[precision, recall, f1]) else: model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=[precision, recall, f1]) model.fit([x_train, np.zeros_like(x_train), np.ones_like(x_train)], y_train, epochs=FLAGS.num_train_epochs) return model