def train(train_i, args): dataset = MyDataset() module = hub.Module(name=args.model) reader = hub.reader.MultiLabelClassifyReader( dataset=dataset, vocab_path=module.get_vocab_path(), max_seq_len=args.max_seq_len) strategy = hub.AdamWeightDecayStrategy( weight_decay=args.weight_decay, warmup_proportion=args.warmup_proportion, lr_scheduler=args.lr_scheduler, learning_rate=args.learning_rate) config = hub.RunConfig(use_cuda=args.use_gpu, num_epoch=args.num_epoch, checkpoint_dir=args.checkpoint_dir + str(train_i), batch_size=args.batch_size, eval_interval=eval_interval, log_interval=log_interval, strategy=strategy) inputs, outputs, program = module.context(trainable=True, max_seq_len=args.max_seq_len) # Use "pooled_output" for classification tasks on an entire sentence. pooled_output = outputs["pooled_output"] # feed_list的Tensor顺序不可以调整 feed_list = [ inputs["input_ids"].name, inputs["position_ids"].name, inputs["segment_ids"].name, inputs["input_mask"].name, ] cls_task = hub.MultiLabelClassifierTask(data_reader=reader, feature=pooled_output, feed_list=feed_list, num_classes=dataset.num_labels, config=config) cls_task.main_program.random_seed = args.seed change_task(cls_task, train_i) return cls_task, reader
max_seq_len=args.max_seq_len, use_task_id=args.use_taskid) # Construct transfer learning network # Use "pooled_output" for classification tasks on an entire sentence. pooled_output = outputs["pooled_output"] # Select finetune strategy, setup config and finetune strategy = hub.AdamWeightDecayStrategy(weight_decay=args.weight_decay, learning_rate=args.learning_rate, lr_scheduler="linear_decay") # Setup runing config for PaddleHub Finetune API config = hub.RunConfig(use_cuda=args.use_gpu, num_epoch=args.num_epoch, batch_size=args.batch_size, checkpoint_dir=args.checkpoint_dir, strategy=strategy) # Define a classfication finetune task by PaddleHub's API multi_label_cls_task = hub.MultiLabelClassifierTask( data_reader=reader, feature=pooled_output, feed_list=feed_list, num_classes=dataset.num_labels, config=config) # Finetune and evaluate by PaddleHub's API # will finish training, evaluation, testing, save model automatically multi_label_cls_task.finetune_and_eval()
dataset = hub.dataset.Toxic(tokenizer=tokenizer, max_seq_len=args.max_seq_len) # Construct transfer learning network # Use "pooled_output" for classification tasks on an entire sentence. pooled_output = outputs["pooled_output"] # Select fine-tune strategy, setup config and fine-tune strategy = hub.AdamWeightDecayStrategy( warmup_proportion=args.warmup_proportion, weight_decay=args.weight_decay, learning_rate=args.learning_rate) # Setup RunConfig for PaddleHub Fine-tune API config = hub.RunConfig(use_cuda=args.use_gpu, num_epoch=args.num_epoch, batch_size=args.batch_size, checkpoint_dir=args.checkpoint_dir, strategy=strategy) # Define a classfication fine-tune task by PaddleHub's API multi_label_cls_task = hub.MultiLabelClassifierTask( dataset=dataset, feature=pooled_output, num_classes=dataset.num_labels, config=config) # Fine-tune and evaluate by PaddleHub's API # will finish training, evaluation, testing, save model automatically multi_label_cls_task.finetune_and_eval()