Esempio n. 1
0
                               tokenizer=tokenizer,
                               max_seq_len=args.max_seq_len)

    # Construct transfer learning network
    # Use "pooled_output" for classification tasks on an entire sentence.
    # Use "sequence_output" for token-level output.
    pooled_output = outputs["pooled_output"]

    # Select fine-tune strategy, setup config and fine-tune
    strategy = hub.AdamWeightDecayStrategy(
        warmup_proportion=args.warmup_proportion,
        weight_decay=args.weight_decay,
        learning_rate=args.learning_rate)

    # Setup RunConfig for PaddleHub Fine-tune API
    config = hub.RunConfig(use_data_parallel=args.use_data_parallel,
                           use_cuda=args.use_gpu,
                           num_epoch=args.num_epoch,
                           batch_size=args.batch_size,
                           checkpoint_dir=args.checkpoint_dir,
                           strategy=strategy)

    # Define a regression fine-tune task by PaddleHub's API
    reg_task = hub.RegressionTask(dataset=dataset,
                                  feature=pooled_output,
                                  config=config)

    # Fine-tune and evaluate by PaddleHub's API
    # will finish training, evaluation, testing, save model automatically
    reg_task.finetune_and_eval()
Esempio n. 2
0
        inputs["segment_ids"].name,
        inputs["input_mask"].name,
    ]

    if args.use_taskid:
        feed_list.append(inputs["task_ids"].name)

    # Select finetune strategy, setup config and finetune
    strategy = hub.AdamWeightDecayStrategy(weight_decay=args.weight_decay,
                                           learning_rate=args.learning_rate,
                                           lr_scheduler="linear_decay")

    # Setup runing config for PaddleHub Finetune API
    config = hub.RunConfig(use_data_parallel=args.use_data_parallel,
                           use_pyreader=args.use_pyreader,
                           use_cuda=args.use_gpu,
                           num_epoch=args.num_epoch,
                           batch_size=args.batch_size,
                           checkpoint_dir=args.checkpoint_dir,
                           strategy=strategy)

    # Define a regression finetune task by PaddleHub's API
    reg_task = hub.RegressionTask(data_reader=reader,
                                  feature=pooled_output,
                                  feed_list=feed_list,
                                  config=config)

    # Finetune and evaluate by PaddleHub's API
    # will finish training, evaluation, testing, save model automatically
    reg_task.finetune_and_eval()
Esempio n. 3
0
    # Construct transfer learning network
    # Use "pooled_output" for classification tasks on an entire sentence.
    # Use "sequence_output" for token-level output.
    pooled_output = outputs["pooled_output"]

    # Setup RunConfig for PaddleHub Fine-tune API
    config = hub.RunConfig(use_data_parallel=False,
                           use_cuda=args.use_gpu,
                           batch_size=args.batch_size,
                           checkpoint_dir=args.checkpoint_dir,
                           strategy=hub.AdamWeightDecayStrategy())

    # Define a regression fine-tune task by PaddleHub's API
    reg_task = hub.RegressionTask(
        feature=pooled_output,
        config=config,
    )

    # STS-B has provided the predict data, and the dataset has process it. If you want to process customized data,
    # see the predict.py in text_classification demo
    # Use the appropriate tokenizer to preprocess the data
    # For ernie_tiny, it will do word segmentation to get subword. More details: https://www.jiqizhixin.com/articles/2019-11-06-9
    if module.name == "ernie_tiny":
        tokenizer = hub.ErnieTinyTokenizer(
            vocab_file=module.get_vocab_path(),
            spm_path=module.get_spm_path(),
            word_dict_path=module.get_word_dict_path())
    else:
        tokenizer = hub.BertTokenizer(vocab_file=module.get_vocab_path())
    dataset = hub.dataset.GLUE("STS-B",
                               tokenizer=tokenizer,