if __name__ == '__main__': label_list = ["B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "O"] label_map = {idx: label for idx, label in enumerate(label_list)} model = hub.Module( name='ernie_tiny', version='2.0.1', task='token-cls', label_map=label_map, ) train_dataset = hub.datasets.MSRA_NER(tokenizer=model.get_tokenizer(), max_seq_len=128, mode='train') dev_dataset = hub.datasets.MSRA_NER(tokenizer=model.get_tokenizer(), max_seq_len=50, mode='dev') optimizer = paddle.optimizer.AdamW(learning_rate=5e-5, parameters=model.parameters()) trainer = hub.Trainer(model, optimizer, checkpoint_dir='token_cls_save_dir', use_gpu=True) trainer.train(train_dataset, epochs=3, batch_size=32, eval_dataset=dev_dataset, save_interval=1)
if __name__ == '__main__': model = hub.Module(name='ernie_tiny', version='2.0.1', task='text-matching') tokenizer = model.get_tokenizer() train_dataset = LCQMC(tokenizer=tokenizer, max_seq_len=args.max_seq_len, mode='train') dev_dataset = LCQMC(tokenizer=tokenizer, max_seq_len=args.max_seq_len, mode='dev') test_dataset = LCQMC(tokenizer=tokenizer, max_seq_len=args.max_seq_len, mode='test') optimizer = paddle.optimizer.AdamW(learning_rate=args.learning_rate, parameters=model.parameters()) trainer = hub.Trainer(model, optimizer, checkpoint_dir=args.checkpoint_dir, use_gpu=args.use_gpu) trainer.train( train_dataset, epochs=args.num_epoch, batch_size=args.batch_size, eval_dataset=dev_dataset, save_interval=args.save_interval, ) trainer.evaluate(test_dataset, batch_size=args.batch_size)
import paddlehub as hub model = hub.Module(name='ernie_tiny', version='2.0.1', task='seq-cls', num_classes=2) train_dataset = hub.datasets.ChnSentiCorp(tokenizer=model.get_tokenizer(), max_seq_len=128, mode='train') dev_dataset = hub.datasets.ChnSentiCorp(tokenizer=model.get_tokenizer(), max_seq_len=128, mode='dev') import paddle optimizer = paddle.optimizer.Adam(learning_rate=5e-5, parameters=model.parameters()) trainer = hub.Trainer(model, optimizer, checkpoint_dir='test_ernie_text_cls', use_gpu=False) trainer.train(train_dataset, epochs=3, batch_size=32, eval_dataset=dev_dataset, save_interval=1)