Esempio n. 1
0
if __name__ == '__main__':
    label_list = ["B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "O"]
    label_map = {idx: label for idx, label in enumerate(label_list)}
    model = hub.Module(
        name='ernie_tiny',
        version='2.0.1',
        task='token-cls',
        label_map=label_map,
    )

    train_dataset = hub.datasets.MSRA_NER(tokenizer=model.get_tokenizer(),
                                          max_seq_len=128,
                                          mode='train')

    dev_dataset = hub.datasets.MSRA_NER(tokenizer=model.get_tokenizer(),
                                        max_seq_len=50,
                                        mode='dev')

    optimizer = paddle.optimizer.AdamW(learning_rate=5e-5,
                                       parameters=model.parameters())
    trainer = hub.Trainer(model,
                          optimizer,
                          checkpoint_dir='token_cls_save_dir',
                          use_gpu=True)

    trainer.train(train_dataset,
                  epochs=3,
                  batch_size=32,
                  eval_dataset=dev_dataset,
                  save_interval=1)
Esempio n. 2
0
if __name__ == '__main__':
    model = hub.Module(name='ernie_tiny',
                       version='2.0.1',
                       task='text-matching')
    tokenizer = model.get_tokenizer()

    train_dataset = LCQMC(tokenizer=tokenizer,
                          max_seq_len=args.max_seq_len,
                          mode='train')
    dev_dataset = LCQMC(tokenizer=tokenizer,
                        max_seq_len=args.max_seq_len,
                        mode='dev')
    test_dataset = LCQMC(tokenizer=tokenizer,
                         max_seq_len=args.max_seq_len,
                         mode='test')

    optimizer = paddle.optimizer.AdamW(learning_rate=args.learning_rate,
                                       parameters=model.parameters())
    trainer = hub.Trainer(model,
                          optimizer,
                          checkpoint_dir=args.checkpoint_dir,
                          use_gpu=args.use_gpu)
    trainer.train(
        train_dataset,
        epochs=args.num_epoch,
        batch_size=args.batch_size,
        eval_dataset=dev_dataset,
        save_interval=args.save_interval,
    )
    trainer.evaluate(test_dataset, batch_size=args.batch_size)
Esempio n. 3
0
import paddlehub as hub
model = hub.Module(name='ernie_tiny',
                   version='2.0.1',
                   task='seq-cls',
                   num_classes=2)

train_dataset = hub.datasets.ChnSentiCorp(tokenizer=model.get_tokenizer(),
                                          max_seq_len=128,
                                          mode='train')
dev_dataset = hub.datasets.ChnSentiCorp(tokenizer=model.get_tokenizer(),
                                        max_seq_len=128,
                                        mode='dev')

import paddle

optimizer = paddle.optimizer.Adam(learning_rate=5e-5,
                                  parameters=model.parameters())
trainer = hub.Trainer(model,
                      optimizer,
                      checkpoint_dir='test_ernie_text_cls',
                      use_gpu=False)

trainer.train(train_dataset,
              epochs=3,
              batch_size=32,
              eval_dataset=dev_dataset,
              save_interval=1)