Esempio n. 1
0
    def test_on_chinese_daily_ner(self):
        import numpy as np
        from posner.datasets import chinese_daily_ner
        from posner.utils.bert_tokenization import FullTokenizer

        (x_train, y_train), (x_test, y_test), (vocab, pos_tags) = \
          chinese_daily_ner.load_data(path=None, maxlen=16, onehot=True, min_freq=2)

        current_path = os.path.dirname(os.path.abspath(__file__))
        config_path = os.path.join(current_path, 'test_checkpoint',
                                   'bert_config.json')
        model_path = os.path.join(current_path, 'test_checkpoint',
                                  'bert_model.ckpt')
        model = load_trained_model_from_checkpoint(
            config_path,
            model_path,
            training=False,
            trainable=['Encoder'],
        )
        model.summary(line_length=120)
        model.compile(optimizer='rmsprop',
                      loss='mean_squared_error',
                      metrics=['accuracy'])

        tokenizer = FullTokenizer(
            os.path.join(current_path, 'test_checkpoint', 'vocab.txt'))

        # text = 'all language'
        # x = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text))
        x = np.zeros((1, 16))
        x[0, 0] = 1
        y = np.zeros((1, 16, 4))
        y[0, 0, 0] = 1

        model.fit([x, np.zeros((1, 16)).reshape(1, 16)], y, epochs=100)
Esempio n. 2
0
def load_trained_model_from_checkpoint(
        config_file,
        checkpoint_file,
        crf_dims,
        training=False,
        trainable=None,
        output_layer_num=1,
        seq_len=int(1e9),
        **kwargs):

  model = bert.load_trained_model_from_checkpoint(
    config_file,
    checkpoint_file,
    training=training,  # MLM, NSP
    use_adapter=True,
    trainable=  # Adapter
    ['Encoder-{}-MultiHeadSelfAttention-Adapter'.format(i + 1) for i in
     range(layer_num)] +
    ['Encoder-{}-FeedForward-Adapter'.format(i + 1) for i in range(layer_num)] +
    ['Encoder-{}-MultiHeadSelfAttention-Norm'.format(i + 1) for i in
     range(layer_num)] +
    ['Encoder-{}-FeedForward-Norm'.format(i + 1) for i in range(layer_num)],
  )
  crf = CRF(crf_dims, name='CRF')
  inp = model.input
  out = crf(model.layers[-9].output)
  model = keras.models.Model(inp, out)
  model.summary(line_length=150)
  return model
Esempio n. 3
0
 def test_load_training(self):
     current_path = os.path.dirname(os.path.abspath(__file__))
     config_path = os.path.join(current_path, 'test_checkpoint',
                                'bert_config.json')
     model_path = os.path.join(current_path, 'test_checkpoint',
                               'bert_model.ckpt')
     model = load_trained_model_from_checkpoint(config_path,
                                                model_path,
                                                training=True)
     model.summary()
Esempio n. 4
0
 def test_load_with_trainable_prefixes(self):
     current_path = os.path.dirname(os.path.abspath(__file__))
     config_path = os.path.join(current_path, 'test_checkpoint',
                                'bert_config.json')
     model_path = os.path.join(current_path, 'test_checkpoint',
                               'bert_model.ckpt')
     model = load_trained_model_from_checkpoint(
         config_path,
         model_path,
         training=False,
         trainable=['Encoder'],
     )
     model.summary()
Esempio n. 5
0
 def test_load_adapter(self):
     current_path = os.path.dirname(os.path.abspath(__file__))
     config_path = os.path.join(current_path, 'test_checkpoint',
                                'bert_config.json')
     model_path = os.path.join(current_path, 'test_checkpoint',
                               'bert_model.ckpt')
     model = load_trained_model_from_checkpoint(
         config_path,
         model_path,
         training=False,
         use_adapter=True,
         trainable=[
             'Encoder-{}-MultiHeadSelfAttention-Adapter'.format(i + 1)
             for i in range(2)
         ] +
         ['Encoder-{}-FeedForward-Adapter'.format(i + 1)
          for i in range(2)] + [
              'Encoder-{}-MultiHeadSelfAttention-Norm'.format(i + 1)
              for i in range(2)
          ] +
         ['Encoder-{}-FeedForward-Norm'.format(i + 1) for i in range(2)],
     )
     model.summary()
Esempio n. 6
0
 def test_load_output_layer_num(self):
     current_path = os.path.dirname(os.path.abspath(__file__))
     config_path = os.path.join(current_path, 'test_checkpoint',
                                'bert_config.json')
     model_path = os.path.join(current_path, 'test_checkpoint',
                               'bert_model.ckpt')
     model = load_trained_model_from_checkpoint(config_path,
                                                model_path,
                                                training=False,
                                                output_layer_num=4)
     model.summary()
     model = load_trained_model_from_checkpoint(config_path,
                                                model_path,
                                                training=False,
                                                output_layer_num=[0])
     model.summary()
     model = load_trained_model_from_checkpoint(config_path,
                                                model_path,
                                                training=False,
                                                output_layer_num=[1])
     model.summary()
     model = load_trained_model_from_checkpoint(config_path,
                                                model_path,
                                                training=False,
                                                output_layer_num=[-1])
     model.summary()
     model = load_trained_model_from_checkpoint(config_path,
                                                model_path,
                                                training=False,
                                                output_layer_num=[-2])
     model.summary()
     model = load_trained_model_from_checkpoint(config_path,
                                                model_path,
                                                training=False,
                                                output_layer_num=[0, -1])
     model.summary()
Esempio n. 7
0
def train_ner():
    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    ## TODO: update for nother datasets.
    # processor = NerProcessor()
    # train_examples = processor.get_train_examples(FLAGS.data_dir)
    # label_list = processor.get_labels()
    # output_dims = len(label_list)

    (x_train, y_train), (x_test, y_test), (vocab, pos_tags) = \
      chinese_daily_ner.load_data(path=None, maxlen=FLAGS.max_seq_length, onehot=True, min_freq=2)
    output_dims = len(pos_tags)

    num_train_steps = int(
        len(x_train) * FLAGS.num_train_epochs / FLAGS.train_batch_size)

    if FLAGS.crf:
        model = bert_crf.load_trained_model_from_checkpoint(
            config_file=FLAGS.bert_config_file,
            checkpoint_file=FLAGS.init_checkpoint,
            crf_dims=output_dims,
            training=True,
            seq_len=FLAGS.max_seq_length,
        )

    else:
        model = bert.load_trained_model_from_checkpoint(
            config_file=FLAGS.bert_config_file,
            checkpoint_file=FLAGS.init_checkpoint,
            training=True,
            seq_len=FLAGS.max_seq_length,
        )

        bottle = tf.keras.layers.Dense(output_dims,
                                       activation='softmax',
                                       name='NER-output')
        inp = model.input
        out = bottle(model.layers[-9].output)  # exlude MLM, NSP
        model = tf.keras.models.Model(inp, out)

        model.summary(line_length=150)

    logging.info("***** Running training *****")
    logging.info("  Num examples = %d", len(x_train))
    logging.info("  Batch size = %d", FLAGS.train_batch_size)
    logging.info("  Num steps = %d", num_train_steps)

    warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
    optimizer = AdamWarmup(decay_steps=FLAGS.decay_steps,
                           warmup_steps=warmup_steps)

    if FLAGS.use_focal_loss:
        #TODO: test CategoricalFocalLoss
        from posner.losses.focal_loss import CategoricalFocalLoss
        focal_loss = CategoricalFocalLoss()
        model.compile(optimizer=optimizer,
                      loss=focal_loss,
                      metrics=[precision, recall, f1])
    else:
        model.compile(optimizer=optimizer,
                      loss='categorical_crossentropy',
                      metrics=[precision, recall, f1])

    model.fit([x_train, np.zeros_like(x_train),
               np.ones_like(x_train)],
              y_train,
              epochs=FLAGS.num_train_epochs)
    return model