예제 #1
0
def build_data(args):
    """
    build train and dev data
    """
    task_name = args.task_name.lower()
    processor = reader.MatchProcessor(data_dir=args.data_dir,
                                      task_name=task_name,
                                      vocab_path=args.vocab_path,
                                      max_seq_len=args.max_seq_len,
                                      do_lower_case=args.do_lower_case)

    train_data_generator = processor.data_generator(batch_size=args.batch_size,
                                                    phase='train',
                                                    epoch=args.epoch,
                                                    shuffle=True,
                                                    device=args.gpu)
    num_train_examples = processor.get_num_examples(phase='train')

    dev_data_generator = processor.data_generator(batch_size=args.batch_size,
                                                  phase='dev',
                                                  epoch=1,
                                                  shuffle=False,
                                                  device=args.gpu)
    num_dev_examples = processor.get_num_examples(phase='dev')

    max_train_steps = args.epoch * num_train_examples // args.batch_size
    warmup_steps = int(max_train_steps * args.warmup_proportion)

    train_data = [train_data_generator, num_train_examples]
    dev_data = [dev_data_generator, num_dev_examples]

    return processor, [train_data, dev_data], warmup_steps
def main(args):

    task_name = args.task_name.lower()
    processor = reader.MatchProcessor(data_dir=args.data_dir,
                                      task_name=task_name,
                                      vocab_path=args.vocab_path,
                                      max_seq_len=args.max_seq_len,
                                      do_lower_case=args.do_lower_case)

    num_labels = len(processor.get_labels())
    infer_data_generator = processor.data_generator(batch_size=args.batch_size,
                                                    phase='test',
                                                    epoch=1,
                                                    shuffle=False)
    num_test_examples = processor.get_num_examples(phase='test')
    main_program = fluid.default_main_program()

    feed_order, loss, probs, accuracy, num_seqs = create_model(
        args, num_labels=num_labels, is_prediction=True)

    if args.use_cuda:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    exe = fluid.Executor(place)
    exe.run(framework.default_startup_program())

    if args.init_checkpoint:
        init_pretraining_params(exe, args.init_checkpoint, main_program)

    feed_list = [
        main_program.global_block().var(var_name) for var_name in feed_order
    ]
    feeder = fluid.DataFeeder(feed_list, place)

    out_scores = open(args.output, 'w')
    for batch_id, data in enumerate(infer_data_generator()):
        results = exe.run(fetch_list=[probs],
                          feed=feeder.feed(data),
                          return_numpy=True)
        for elem in results[0]:
            out_scores.write(str(elem[1]) + '\n')

    out_scores.close()
    if args.save_inference_model_path:
        model_path = args.save_inference_model_path
        fluid.io.save_inference_model(model_path,
                                      feed_order,
                                      probs,
                                      exe,
                                      main_program=main_program)
예제 #3
0
def build_data(args):
    """
    build test data
    """
    task_name = args.task_name.lower()
    processor = reader.MatchProcessor(data_dir=args.data_dir,
                                      task_name=task_name,
                                      vocab_path=args.vocab_path,
                                      max_seq_len=args.max_seq_len,
                                      do_lower_case=args.do_lower_case)

    test_data_generator = processor.data_generator(batch_size=args.batch_size,
                                                   phase='test',
                                                   epoch=1,
                                                   shuffle=False,
                                                   device=args.gpu)
    num_test_examples = processor.get_num_examples(phase='test')

    test_data = [test_data_generator, num_test_examples]

    return processor, test_data
예제 #4
0
def main(args):
    """main"""
    task_name = args.task_name.lower()
    processor = reader.MatchProcessor(data_dir=args.data_dir,
                                      task_name=task_name,
                                      vocab_path=args.vocab_path,
                                      max_seq_len=args.max_seq_len,
                                      do_lower_case=args.do_lower_case)

    args.voc_size = len(open(args.vocab_path, 'r').readlines())
    num_labels = len(processor.get_labels())
    train_data_generator = processor.data_generator(batch_size=args.batch_size,
                                                    phase='train',
                                                    epoch=args.epoch,
                                                    shuffle=True)
    num_train_examples = processor.get_num_examples(phase='train')
    dev_data_generator = processor.data_generator(batch_size=args.batch_size,
                                                  phase='dev',
                                                  epoch=1,
                                                  shuffle=False)
    num_dev_examples = processor.get_num_examples(phase='dev')

    if args.use_cuda:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    max_train_steps = args.epoch * num_train_examples // args.batch_size
    warmup_steps = int(max_train_steps * args.warmup_proportion)

    train_program = fluid.Program()
    train_startup = fluid.Program()
    with fluid.program_guard(train_program, train_startup):
        with fluid.unique_name.guard():
            feed_order, loss, predict, accuracy, num_seqs = \
                    create_model(args, num_labels, \
                    is_prediction=False)
            lr_decay = fluid.layers.learning_rate_scheduler.noam_decay(
                256, warmup_steps)
            with fluid.default_main_program()._lr_schedule_guard():
                learning_rate = lr_decay * args.learning_rate
            optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
            optimizer.minimize(loss)

    test_program = fluid.Program()
    test_startup = fluid.Program()
    with fluid.program_guard(test_program, test_startup):
        with fluid.unique_name.guard():
            feed_order, loss, predict, accuracy, num_seqs = \
                    create_model(args, num_labels, \
                    is_prediction=True)
    test_program = test_program.clone(for_test=True)

    exe = Executor(place)
    exe.run(train_startup)
    exe.run(test_startup)

    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.num_threads = dev_count

    train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                       loss_name=loss.name,
                                       exec_strategy=exec_strategy,
                                       main_program=train_program)

    test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                      main_program=test_program,
                                      share_vars_from=train_exe)

    feed_list = [
        train_program.global_block().var(var_name) for var_name in feed_order
    ]
    feeder = fluid.DataFeeder(feed_list, place)

    time_begin = time.time()
    total_cost, total_acc, total_num_seqs = [], [], []
    for batch_id, data in enumerate(train_data_generator()):
        fetch_outs = train_exe.run(
            feed=feeder.feed(data),
            fetch_list=[loss.name, accuracy.name, num_seqs.name])
        avg_loss = fetch_outs[0]
        avg_acc = fetch_outs[1]
        cur_num_seqs = fetch_outs[2]
        total_cost.extend(avg_loss * cur_num_seqs)
        total_acc.extend(avg_acc * cur_num_seqs)
        total_num_seqs.extend(cur_num_seqs)
        if batch_id % args.skip_steps == 0:
            time_end = time.time()
            used_time = time_end - time_begin
            current_example, current_epoch = processor.get_train_progress()
            print("epoch: %d, progress: %d/%d, step: %d, ave loss: %f, "
                  "ave acc: %f, speed: %f steps/s" %
                  (current_epoch, current_example, num_train_examples,
                   batch_id, np.sum(total_cost) / np.sum(total_num_seqs),
                   np.sum(total_acc) / np.sum(total_num_seqs),
                   args.skip_steps / used_time))
            time_begin = time.time()
            total_cost, total_acc, total_num_seqs = [], [], []

        if batch_id % args.validation_steps == 0:
            total_dev_cost, total_dev_acc, total_dev_num_seqs = [], [], []
            for dev_id, dev_data in enumerate(dev_data_generator()):
                fetch_outs = test_exe.run(
                    feed=feeder.feed(dev_data),
                    fetch_list=[loss.name, accuracy.name, num_seqs.name])
                avg_dev_loss = fetch_outs[0]
                avg_dev_acc = fetch_outs[1]
                cur_dev_num_seqs = fetch_outs[2]
                total_dev_cost.extend(avg_dev_loss * cur_dev_num_seqs)
                total_dev_acc.extend(avg_dev_acc * cur_dev_num_seqs)
                total_dev_num_seqs.extend(cur_dev_num_seqs)
            print("valid eval: ave loss: %f, ave acc: %f" %
                  (np.sum(total_dev_cost) / np.sum(total_dev_num_seqs),
                   np.sum(total_dev_acc) / np.sum(total_dev_num_seqs)))
            total_dev_cost, total_dev_acc, total_dev_num_seqs = [], [], []

        if batch_id % args.save_steps == 0:
            model_path = os.path.join(args.checkpoints, str(batch_id))
            if not os.path.isdir(model_path):
                os.makedirs(model_path)
            fluid.io.save_persistables(executor=exe,
                                       dirname=model_path,
                                       main_program=train_program)