コード例 #1
0
def train(train_i, args):
    dataset = MyDataset()
    module = hub.Module(name=args.model)
    reader = hub.reader.MultiLabelClassifyReader(
        dataset=dataset,
        vocab_path=module.get_vocab_path(),
        max_seq_len=args.max_seq_len)

    strategy = hub.AdamWeightDecayStrategy(
        weight_decay=args.weight_decay,
        warmup_proportion=args.warmup_proportion,
        lr_scheduler=args.lr_scheduler,
        learning_rate=args.learning_rate)
    config = hub.RunConfig(use_cuda=args.use_gpu,
                           num_epoch=args.num_epoch,
                           checkpoint_dir=args.checkpoint_dir + str(train_i),
                           batch_size=args.batch_size,
                           eval_interval=eval_interval,
                           log_interval=log_interval,
                           strategy=strategy)
    inputs, outputs, program = module.context(trainable=True,
                                              max_seq_len=args.max_seq_len)

    # Use "pooled_output" for classification tasks on an entire sentence.
    pooled_output = outputs["pooled_output"]

    # feed_list的Tensor顺序不可以调整
    feed_list = [
        inputs["input_ids"].name,
        inputs["position_ids"].name,
        inputs["segment_ids"].name,
        inputs["input_mask"].name,
    ]

    cls_task = hub.MultiLabelClassifierTask(data_reader=reader,
                                            feature=pooled_output,
                                            feed_list=feed_list,
                                            num_classes=dataset.num_labels,
                                            config=config)
    cls_task.main_program.random_seed = args.seed
    change_task(cls_task, train_i)
    return cls_task, reader
コード例 #2
0
        max_seq_len=args.max_seq_len,
        use_task_id=args.use_taskid)

    # Construct transfer learning network
    # Use "pooled_output" for classification tasks on an entire sentence.
    pooled_output = outputs["pooled_output"]

    # Select finetune strategy, setup config and finetune
    strategy = hub.AdamWeightDecayStrategy(weight_decay=args.weight_decay,
                                           learning_rate=args.learning_rate,
                                           lr_scheduler="linear_decay")

    # Setup runing config for PaddleHub Finetune API
    config = hub.RunConfig(use_cuda=args.use_gpu,
                           num_epoch=args.num_epoch,
                           batch_size=args.batch_size,
                           checkpoint_dir=args.checkpoint_dir,
                           strategy=strategy)

    # Define a classfication finetune task by PaddleHub's API
    multi_label_cls_task = hub.MultiLabelClassifierTask(
        data_reader=reader,
        feature=pooled_output,
        feed_list=feed_list,
        num_classes=dataset.num_labels,
        config=config)

    # Finetune and evaluate by PaddleHub's API
    # will finish training, evaluation, testing, save model automatically
    multi_label_cls_task.finetune_and_eval()
コード例 #3
0
    dataset = hub.dataset.Toxic(tokenizer=tokenizer,
                                max_seq_len=args.max_seq_len)

    # Construct transfer learning network
    # Use "pooled_output" for classification tasks on an entire sentence.
    pooled_output = outputs["pooled_output"]

    # Select fine-tune strategy, setup config and fine-tune
    strategy = hub.AdamWeightDecayStrategy(
        warmup_proportion=args.warmup_proportion,
        weight_decay=args.weight_decay,
        learning_rate=args.learning_rate)

    # Setup RunConfig for PaddleHub Fine-tune API
    config = hub.RunConfig(use_cuda=args.use_gpu,
                           num_epoch=args.num_epoch,
                           batch_size=args.batch_size,
                           checkpoint_dir=args.checkpoint_dir,
                           strategy=strategy)

    # Define a classfication fine-tune task by PaddleHub's API
    multi_label_cls_task = hub.MultiLabelClassifierTask(
        dataset=dataset,
        feature=pooled_output,
        num_classes=dataset.num_labels,
        config=config)

    # Fine-tune and evaluate by PaddleHub's API
    # will finish training, evaluation, testing, save model automatically
    multi_label_cls_task.finetune_and_eval()