コード例 #1
0
def extract_weights(args):
    # add ERNIE to environment
    print('extract weights start'.center(60, '='))
    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)
    args.max_seq_len = 512
    args.use_fp16 = False
    args.num_labels = 2
    args.loss_scaling = 1.0
    print('model config:')
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()
    with fluid.program_guard(test_prog, startup_prog):
        with fluid.unique_name.guard():
            _, _ = create_model(args,
                                pyreader_name='train',
                                ernie_config=ernie_config)
    fluid.io.load_vars(exe,
                       args.init_pretraining_params,
                       main_program=test_prog,
                       predicate=if_exist)
    state_dict = collections.OrderedDict()
    weight_map = build_weight_map()
    for ernie_name, gluon_name in weight_map.items():
        fluid_tensor = fluid.global_scope().find_var(ernie_name).get_tensor()
        fluid_array = np.array(fluid_tensor, dtype=np.float32)
        if 'w_0' in ernie_name:
            fluid_array = fluid_array.transpose()
        state_dict[gluon_name] = fluid_array
        print('{} -> {} {}'.format(ernie_name, gluon_name, fluid_array.shape))
    print('extract weights done!'.center(60, '='))
    return state_dict
コード例 #2
0
def main(args, init_checkpoint):
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    predict_prog = fluid.Program()
    predict_startup = fluid.Program()
    with fluid.program_guard(predict_prog, predict_startup):
        with fluid.unique_name.guard():
            predict_pyreader, probs, feed_target_names = create_model(
                args,
                pyreader_name='predict_reader',
                ernie_config=ernie_config,
                is_classify=True,
                is_prediction=True,
                ernie_version=args.ernie_version)

    predict_prog = predict_prog.clone(for_test=True)
    place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(predict_startup)

    if init_checkpoint:
        init_pretraining_params(exe, init_checkpoint, predict_prog)
    else:
        raise ValueError(
            "args 'init_checkpoint' should be set for prediction!")

    #保存模型
    assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction"
    _, ckpt_dir = os.path.split(init_checkpoint.rstrip('/'))
    dir_name = ckpt_dir + '_inference_model'
    model_path = os.path.join(args.save_inference_model_path, dir_name)
    print("save inference model to %s" % model_path)
    fluid.io.save_inference_model(model_path,
                                  feed_target_names, [probs],
                                  exe,
                                  main_program=predict_prog)
コード例 #3
0
        raise ValueError("args 'init_checkpoint' should be set if"
                         "only doing validation or testing!")

    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    startup_prog = fluid.Program()
    test_program = fluid.Program()

    with fluid.program_guard(test_program, startup_prog):
        with fluid.unique_name.guard():
            _, _ = create_model(args,
                                pyreader_name='test_reader',
                                ernie_config=ernie_config)

    exe.run(startup_prog)

    init_pretraining_params(
        exe,
        args.init_checkpoint,
        main_program=test_program,
        #main_program=startup_prog,
        use_fp16=args.use_fp16)

    name2params = {}
    prefix = args.init_checkpoint
    for var in startup_prog.list_vars():
        path = os.path.join(prefix, var.name)
コード例 #4
0
def main(args):
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    reader = ClassifyReader(vocab_path=args.vocab_path,
                            label_map_config=args.label_map_config,
                            max_seq_len=args.max_seq_len,
                            do_lower_case=args.do_lower_case,
                            in_tokens=False,
                            is_inference=True)

    predict_prog = fluid.Program()
    predict_startup = fluid.Program()
    with fluid.program_guard(predict_prog, predict_startup):
        with fluid.unique_name.guard():
            predict_pyreader, probs, feed_target_names = create_model(
                args,
                pyreader_name='predict_reader',
                ernie_config=ernie_config,
                is_prediction=True)

    predict_prog = predict_prog.clone(for_test=True)

    if args.use_cuda:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(predict_startup)

    if args.init_checkpoint:
        init_pretraining_params(exe, args.init_checkpoint, predict_prog)
    else:
        raise ValueError(
            "args 'init_checkpoint' should be set for prediction!")

    assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction"
    _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/'))
    dir_name = ckpt_dir + '_inference_model'
    model_path = os.path.join(args.save_inference_model_path, dir_name)
    print("save inference model to %s" % model_path)
    fluid.io.save_inference_model(model_path,
                                  feed_target_names, [probs],
                                  exe,
                                  main_program=predict_prog)

    print("load inference model from %s" % model_path)
    infer_program, feed_target_names, probs = fluid.io.load_inference_model(
        model_path, exe)

    src_ids = feed_target_names[0]
    sent_ids = feed_target_names[1]
    pos_ids = feed_target_names[2]
    input_mask = feed_target_names[3]

    predict_data_generator = reader.data_generator(input_file=args.predict_set,
                                                   batch_size=args.batch_size,
                                                   epoch=1,
                                                   shuffle=False)

    print("-------------- prediction results --------------")
    np.set_printoptions(precision=4, suppress=True)
    index = 0
    for sample in predict_data_generator():
        src_ids_data = sample[0]
        sent_ids_data = sample[1]
        pos_ids_data = sample[2]
        input_mask_data = sample[3]
        output = exe.run(infer_program,
                         feed={
                             src_ids: src_ids_data,
                             sent_ids: sent_ids_data,
                             pos_ids: pos_ids_data,
                             input_mask: input_mask_data
                         },
                         fetch_list=probs)
        for single_result in output[0]:
            print("example_index:{}\t{}".format(index, single_result))
            index += 1
コード例 #5
0
def main(args):
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    reader = ClassifyReader(
        vocab_path=args.vocab_path,
        label_map_config=args.label_map_config,
        max_seq_len=args.max_seq_len,
        do_lower_case=args.do_lower_case,
        in_tokens=False,
        is_inference=True)

    predict_prog = fluid.Program()
    predict_startup = fluid.Program()
    with fluid.program_guard(predict_prog, predict_startup):
        with fluid.unique_name.guard():
            predict_pyreader, probs, feed_target_names = create_model(
                args,
                pyreader_name='predict_reader',
                ernie_config=ernie_config,
                is_classify=True,
                is_prediction=True)

    predict_prog = predict_prog.clone(for_test=True)

    if args.use_cuda:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(predict_startup)

    if args.init_checkpoint:
        init_pretraining_params(exe, args.init_checkpoint, predict_prog)
    else:
        raise ValueError("args 'init_checkpoint' should be set for prediction!")

    assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction"
    _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/'))
    dir_name = ckpt_dir + '_inference_model'
    model_path = os.path.join(args.save_inference_model_path, dir_name)
    log.info("save inference model to %s" % model_path)
    fluid.io.save_inference_model(
        model_path,
        feed_target_names, [probs],
        exe,
        main_program=predict_prog)

    # Set config
    #config = AnalysisConfig(args.model_dir)
    #config = AnalysisConfig(os.path.join(model_path, "__model__"), os.path.join(model_path, ""))
    config = AnalysisConfig(model_path)
    if not args.use_cuda:
        log.info("disable gpu")
        config.disable_gpu()
        config.switch_ir_optim(True) 
    else:
        log.info("using gpu")
        config.enable_use_gpu(1024)

    # Create PaddlePredictor
    predictor = create_paddle_predictor(config)

    predict_data_generator = reader.data_generator(
        input_file=args.predict_set,
        batch_size=args.batch_size,
        epoch=1,
        shuffle=False)

    log.info("-------------- prediction results --------------")
    np.set_printoptions(precision=4, suppress=True)
    index = 0
    total_time = 0
    for sample in predict_data_generator():
        src_ids    = sample[0]
        sent_ids   = sample[1]
        pos_ids    = sample[2]
        task_ids   = sample[3]
        input_mask = sample[4]

        inputs = [array2tensor(ndarray) for ndarray in [src_ids, sent_ids, pos_ids, input_mask]]
        begin_time = time.time()
        outputs = predictor.run(inputs)
        end_time = time.time()
        total_time += end_time - begin_time

        # parse outputs
        output = outputs[0]
        batch_result  = output.as_ndarray()
        for single_example_probs in batch_result:
            print('\t'.join(map(str, single_example_probs.tolist())))
            index += 1
    log.info("qps:{}\ttotal_time:{}\ttotal_example:{}\tbatch_size:{}".format(index/total_time, total_time, index, args.batch_size))
コード例 #6
0
def main(args):
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
    exe = fluid.Executor(place)

    reader = task_reader.ClassifyReader(
        vocab_path=args.vocab_path,
        label_map_config=args.label_map_config,
        max_seq_len=args.max_seq_len,
        do_lower_case=args.do_lower_case,
        in_tokens=args.in_tokens,
        random_seed=args.random_seed)

    if not (args.do_train or args.do_val or args.do_test):
        raise ValueError("For args `do_train`, `do_val` and `do_test`, at "
                         "least one of them must be True.")

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    if args.do_train:
        train_data_generator = reader.data_generator(
            input_file=args.train_set,
            batch_size=args.batch_size,
            epoch=args.epoch,
            shuffle=True,
            phase="train")

        num_train_examples = reader.get_num_examples(args.train_set)

        if args.in_tokens:
            max_train_steps = args.epoch * num_train_examples // (
                args.batch_size // args.max_seq_len) // dev_count
        else:
            max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        warmup_steps = int(max_train_steps * args.warmup_proportion)
        print("Device count: %d" % dev_count)
        print("Num train examples: %d" % num_train_examples)
        print("Max train steps: %d" % max_train_steps)
        print("Num warmup steps: %d" % warmup_steps)

        train_program = fluid.Program()

        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                train_pyreader, graph_vars = create_model(
                    args,
                    pyreader_name='train_reader',
                    ernie_config=ernie_config)
                scheduled_lr = optimization(
                    loss=graph_vars["loss"],
                    warmup_steps=warmup_steps,
                    num_train_steps=max_train_steps,
                    learning_rate=args.learning_rate,
                    train_program=train_program,
                    startup_prog=startup_prog,
                    weight_decay=args.weight_decay,
                    scheduler=args.lr_scheduler,
                    use_fp16=args.use_fp16,
                    loss_scaling=args.loss_scaling)

                fluid.memory_optimize(
                    input_program=train_program,
                    skip_opt_set=[
                        graph_vars["loss"].name,
                        graph_vars["probs"].name,
                        graph_vars["accuracy"].name,
                        graph_vars["num_seqs"].name,
                    ])

        if args.verbose:
            if args.in_tokens:
                lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                    program=train_program,
                    batch_size=args.batch_size // args.max_seq_len)
            else:
                lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                    program=train_program, batch_size=args.batch_size)
            print("Theoretical memory usage in training: %.3f - %.3f %s" %
                  (lower_mem, upper_mem, unit))

    if args.do_val or args.do_test:
        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
                test_pyreader, graph_vars = create_model(
                    args,
                    pyreader_name='test_reader',
                    ernie_config=ernie_config)

        test_prog = test_prog.clone(for_test=True)

    exe.run(startup_prog)

    if args.do_train:
        if args.init_checkpoint and args.init_pretraining_params:
            print(
                "WARNING: args 'init_checkpoint' and 'init_pretraining_params' "
                "both are set! Only arg 'init_checkpoint' is made valid.")
        if args.init_checkpoint:
            init_checkpoint(
                exe,
                args.init_checkpoint,
                main_program=startup_prog,
                use_fp16=args.use_fp16)
        elif args.init_pretraining_params:
            init_pretraining_params(
                exe,
                args.init_pretraining_params,
                main_program=startup_prog,
                use_fp16=args.use_fp16)
    elif args.do_val or args.do_test:
        if not args.init_checkpoint:
            raise ValueError("args 'init_checkpoint' should be set if"
                             "only doing validation or testing!")
        init_checkpoint(
            exe,
            args.init_checkpoint,
            main_program=startup_prog,
            use_fp16=args.use_fp16)

    if args.do_train:
        exec_strategy = fluid.ExecutionStrategy()
        if args.use_fast_executor:
            exec_strategy.use_experimental_executor = True
        exec_strategy.num_threads = dev_count
        exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope

        train_exe = fluid.ParallelExecutor(
            use_cuda=args.use_cuda,
            loss_name=graph_vars["loss"].name,
            exec_strategy=exec_strategy,
            main_program=train_program)

        train_pyreader.decorate_tensor_provider(train_data_generator)
    else:
        train_exe = None

    if args.do_train:
        train_pyreader.start()
        steps = 0
        if warmup_steps > 0:
            graph_vars["learning_rate"] = scheduled_lr

        if args.save_log and args.log_path:
            if os.path.exists(args.log_path):
                raise FileExistsError("Logging file already exists!")
            with open(args.log_path, 'w') as logfile:
                logfile.write('%s\n' % time.asctime())
            print('Writing logs into %s' % args.log_path)

        time_begin = time.time()
        while True:
            try:
                steps += 1
                if steps % args.skip_steps != 0:
                    train_exe.run(fetch_list=[])
                else:
                    outputs = evaluate(train_exe, train_program, train_pyreader,
                                       graph_vars, "train")

                    if args.verbose:
                        verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size(
                        )
                        verbose += "learning rate: %f" % (
                            outputs["learning_rate"]
                            if warmup_steps > 0 else args.learning_rate)
                        print(verbose)

                    current_example, current_epoch = reader.get_train_progress()
                    time_end = time.time()
                    used_time = time_end - time_begin
                    print("epoch: %d, progress: %d/%d, step: %d, "
                          "ave loss: %.4f, micro_f1: %.4f, micro_p: %.4f, micro_r: %.4f, "
                          "speed: %f steps/s" %
                          (current_epoch, current_example, num_train_examples, steps,
                           outputs["loss"], outputs["micro_f"], outputs["micro_p"], outputs["micro_r"],
                           args.skip_steps / used_time))

                    # Todo: complete logging function
                    # Todo: print more useful metrics: f1/p/r instead of acc
                    if args.save_log and args.log_path:
                        with open(args.log_path, 'a') as logfile:
                            logfile.write("epoch: %d, progress: %d/%d, step: %d, "
                          "ave loss: %.4f, ave_acc: %.4f, micro_f1: %.4f, micro_p: %.4f, micro_r: %.4f, "
                          "speed: %f steps/s\n" %
                          (current_epoch, current_example, num_train_examples, steps,
                           outputs["loss"], outputs["accuracy"], outputs["micro_f"], outputs["micro_p"], outputs["micro_r"],
                           args.skip_steps / used_time))

                    time_begin = time.time()

                if steps % args.save_steps == 0:
                    save_path = os.path.join(args.checkpoints,
                                             "step_" + str(steps))
                    fluid.io.save_persistables(exe, save_path, train_program)

                if steps % args.validation_steps == 0:
                    # evaluate dev set
                    if args.do_val:
                        test_pyreader.decorate_tensor_provider(
                            reader.data_generator(
                                args.dev_set,
                                batch_size=args.batch_size,
                                epoch=1,
                                shuffle=False))
                        evaluate(exe, test_prog, test_pyreader, graph_vars,
                                 "dev")
                    # evaluate test set
                    if args.do_test:
                        test_pyreader.decorate_tensor_provider(
                            reader.data_generator(
                                args.test_set,
                                batch_size=args.batch_size,
                                epoch=1,
                                shuffle=False))
                        evaluate(exe, test_prog, test_pyreader, graph_vars,
                                 "test")
            except fluid.core.EOFException:
                save_path = os.path.join(args.checkpoints, "step_" + str(steps))
                fluid.io.save_persistables(exe, save_path, train_program)
                train_pyreader.reset()
                break

    # final eval on dev set
    if args.do_val:
        test_pyreader.decorate_tensor_provider(
            reader.data_generator(
                args.dev_set,
                batch_size=args.batch_size,
                epoch=1,
                shuffle=False))
        print("Final validation result:")
        evaluate(exe, test_prog, test_pyreader, graph_vars, "dev")

    # final eval on test set
    if args.do_test:
        test_pyreader.decorate_tensor_provider(
            reader.data_generator(
                args.test_set,
                batch_size=args.batch_size,
                epoch=1,
                shuffle=False))
        print("Final test result:")
        evaluate(exe, test_prog, test_pyreader, graph_vars, "test")
コード例 #7
0
ファイル: run_classifier.py プロジェクト: xuruiwen/ERNIE
def main(args):
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    if args.use_cuda:
        dev_list = fluid.cuda_places()
        place = dev_list[0]
        dev_count = len(dev_list)
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
    exe = fluid.Executor(place)

    reader = task_reader.ClassifyReader(
        vocab_path=args.vocab_path,
        label_map_config=args.label_map_config,
        max_seq_len=args.max_seq_len,
        do_lower_case=args.do_lower_case,
        in_tokens=args.in_tokens,
        random_seed=args.random_seed,
        tokenizer=args.tokenizer,
        is_classify=args.is_classify,
        is_regression=args.is_regression,
        for_cn=args.for_cn,
        task_id=args.task_id)

    if not (args.do_train or args.do_val or args.do_test):
        raise ValueError("For args `do_train`, `do_val` and `do_test`, at "
                         "least one of them must be True.")

    if args.do_test:
        assert args.test_save is not None
    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    if args.do_train:
        train_data_generator = reader.data_generator(
            input_file=args.train_set,
            batch_size=args.batch_size,
            epoch=args.epoch,
            dev_count=dev_count,
            shuffle=True,
            phase="train")

        num_train_examples = reader.get_num_examples(args.train_set)

        if args.in_tokens:
            max_train_steps = args.epoch * num_train_examples // (
                args.batch_size // args.max_seq_len) // dev_count
        else:
            max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        warmup_steps = int(max_train_steps * args.warmup_proportion)
        log.info("Device count: %d" % dev_count)
        log.info("Num train examples: %d" % num_train_examples)
        log.info("Max train steps: %d" % max_train_steps)
        log.info("Num warmup steps: %d" % warmup_steps)

        train_program = fluid.Program()
        if args.random_seed is not None and args.enable_ce:
            train_program.random_seed = args.random_seed

        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                train_pyreader, graph_vars = create_model(
                    args,
                    pyreader_name='train_reader',
                    ernie_config=ernie_config,
                    is_classify=args.is_classify,
                    is_regression=args.is_regression)
                scheduled_lr, loss_scaling = optimization(
                    loss=graph_vars["loss"],
                    warmup_steps=warmup_steps,
                    num_train_steps=max_train_steps,
                    learning_rate=args.learning_rate,
                    train_program=train_program,
                    startup_prog=startup_prog,
                    weight_decay=args.weight_decay,
                    scheduler=args.lr_scheduler,
		    use_fp16=args.use_fp16,
		    use_dynamic_loss_scaling=args.use_dynamic_loss_scaling,
		    init_loss_scaling=args.init_loss_scaling,
		    incr_every_n_steps=args.incr_every_n_steps,
		    decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf,
		    incr_ratio=args.incr_ratio,
		    decr_ratio=args.decr_ratio)

        if args.verbose:
            if args.in_tokens:
                lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                    program=train_program,
                    batch_size=args.batch_size // args.max_seq_len)
            else:
                lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                    program=train_program, batch_size=args.batch_size)
            log.info("Theoretical memory usage in training: %.3f - %.3f %s" %
                  (lower_mem, upper_mem, unit))

    if args.do_val or args.do_test:
        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
                test_pyreader, graph_vars = create_model(
                    args,
                    pyreader_name='test_reader',
                    ernie_config=ernie_config,
                    is_classify=args.is_classify,
                    is_regression=args.is_regression)

        test_prog = test_prog.clone(for_test=True)
    nccl2_num_trainers = 1
    nccl2_trainer_id = 0
    if args.is_distributed:
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
        worker_endpoints_env = os.getenv("PADDLE_TRAINER_ENDPOINTS")
        current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT")
        worker_endpoints = worker_endpoints_env.split(",")
        trainers_num = len(worker_endpoints)
        
        log.info("worker_endpoints:{} trainers_num:{} current_endpoint:{} \
              trainer_id:{}".format(worker_endpoints, trainers_num,
                                    current_endpoint, trainer_id))

        # prepare nccl2 env.
        config = fluid.DistributeTranspilerConfig()
        config.mode = "nccl2"
        t = fluid.DistributeTranspiler(config=config)
        t.transpile(
            trainer_id,
            trainers=worker_endpoints_env,
            current_endpoint=current_endpoint,
            program=train_program if args.do_train else test_prog,
            startup_program=startup_prog)
        nccl2_num_trainers = trainers_num
        nccl2_trainer_id = trainer_id

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    if args.do_train:
        if args.init_checkpoint and args.init_pretraining_params:
            log.warning(
                "WARNING: args 'init_checkpoint' and 'init_pretraining_params' "
                "both are set! Only arg 'init_checkpoint' is made valid.")
        if args.init_checkpoint:
            init_checkpoint(
                exe,
                args.init_checkpoint,
                main_program=startup_prog,
                use_fp16=args.use_fp16)
        elif args.init_pretraining_params:
            init_pretraining_params(
                exe,
                args.init_pretraining_params,
                main_program=startup_prog,
                use_fp16=args.use_fp16)
    elif args.do_val or args.do_test:
        if not args.init_checkpoint:
            raise ValueError("args 'init_checkpoint' should be set if"
                             "only doing validation or testing!")
        init_checkpoint(
            exe,
            args.init_checkpoint,
            main_program=startup_prog,
            use_fp16=args.use_fp16)

    if args.do_train:
        exec_strategy = fluid.ExecutionStrategy()
        if args.use_fast_executor:
            exec_strategy.use_experimental_executor = True
        exec_strategy.num_threads = dev_count
        exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope

        train_exe = fluid.ParallelExecutor(
            use_cuda=args.use_cuda,
            loss_name=graph_vars["loss"].name,
            exec_strategy=exec_strategy,
            main_program=train_program,
            num_trainers=nccl2_num_trainers,
            trainer_id=nccl2_trainer_id)

        train_pyreader.decorate_tensor_provider(train_data_generator)
    else:
        train_exe = None

    test_exe = exe
    if args.do_val or args.do_test:
        if args.use_multi_gpu_test:
            test_exe = fluid.ParallelExecutor(
                use_cuda=args.use_cuda,
                main_program=test_prog,
                share_vars_from=train_exe)

    if args.do_train:
        train_pyreader.start()
        steps = 0
        if warmup_steps > 0:
            graph_vars["learning_rate"] = scheduled_lr

        ce_info = []
        time_begin = time.time()
        last_epoch = 0
        current_epoch = 0
        while True:
            try:
                steps += 1
                if steps % args.skip_steps != 0:
                    train_exe.run(fetch_list=[])
                else:
                    outputs = evaluate(
                        train_exe,
                        train_program,
                        train_pyreader,
                        graph_vars,
                        "train",
                        metric=args.metric,
                        is_classify=args.is_classify,
                        is_regression=args.is_regression)

                    if args.verbose:
                        verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size(
                        )
                        verbose += "learning rate: %f" % (
                            outputs["learning_rate"]
                            if warmup_steps > 0 else args.learning_rate)
                        log.info(verbose)

                    current_example, current_epoch = reader.get_train_progress()
                    time_end = time.time()
                    used_time = time_end - time_begin

                    if args.is_classify:
                        log.info(
                            "epoch: %d, progress: %d/%d, step: %d, ave loss: %f, "
                            "ave acc: %f, speed: %f steps/s" %
                            (current_epoch, current_example, num_train_examples,
                             steps, outputs["loss"], outputs["accuracy"],
                             args.skip_steps / used_time))
                        ce_info.append(
                            [outputs["loss"], outputs["accuracy"], used_time])
                    if args.is_regression:
                        log.info(
                            "epoch: %d, progress: %d/%d, step: %d, ave loss: %f, "
                            " speed: %f steps/s" %
                            (current_epoch, current_example, num_train_examples,
                             steps, outputs["loss"],
                             args.skip_steps / used_time))
                    time_begin = time.time()

                if nccl2_trainer_id == 0:
                    if steps % args.save_steps == 0:
                        save_path = os.path.join(args.checkpoints,
                                                 "step_" + str(steps))
                        fluid.io.save_persistables(exe, save_path, train_program)

                    if steps % args.validation_steps == 0 or last_epoch != current_epoch:
                        # evaluate dev set
                        if args.do_val:
                            evaluate_wrapper(args, reader, exe, test_prog,
                                             test_pyreader, graph_vars,
                                             current_epoch, steps)

                        if args.do_test:
                            predict_wrapper(args, reader, exe, test_prog,
                                            test_pyreader, graph_vars,
                                            current_epoch, steps)

                if last_epoch != current_epoch:
                    last_epoch = current_epoch

            except fluid.core.EOFException:
                save_path = os.path.join(args.checkpoints, "step_" + str(steps))
                fluid.io.save_persistables(exe, save_path, train_program)
                train_pyreader.reset()
                break
        if args.enable_ce:
            card_num = get_cards()
            ce_loss = 0
            ce_acc = 0
            ce_time = 0
            try:
                ce_loss = ce_info[-2][0]
                ce_acc = ce_info[-2][1]
                ce_time = ce_info[-2][2]
            except:
                log.info("ce info error")
            log.info("kpis\ttrain_duration_card%s\t%s" % (card_num, ce_time))
            log.info("kpis\ttrain_loss_card%s\t%f" % (card_num, ce_loss))
            log.info("kpis\ttrain_acc_card%s\t%f" % (card_num, ce_acc))

    # final eval on dev set
    if args.do_val:
        evaluate_wrapper(args, reader, exe, test_prog, test_pyreader,
                         graph_vars, current_epoch, steps)

    # final eval on test set
    if args.do_test:
        predict_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars,
                        current_epoch, steps)

    # final eval on dianostic, hack for glue-ax
    if args.diagnostic:
        test_pyreader.decorate_tensor_provider(
            reader.data_generator(
                args.diagnostic,
                batch_size=args.batch_size,
                epoch=1,
                dev_count=1,
                shuffle=False))

        log.info("Final diagnostic")
        qids, preds, probs = predict(
            test_exe,
            test_prog,
            test_pyreader,
            graph_vars,
            is_classify=args.is_classify,
            is_regression=args.is_regression)
        assert len(qids) == len(preds), '{} v.s. {}'.format(
            len(qids), len(preds))
        with open(args.diagnostic_save, 'w') as f:
            for id, s, p in zip(qids, preds, probs):
                f.write('{}\t{}\t{}\n'.format(id, s, p))

        log.info("Done final diagnostic, saving to {}".format(
            args.diagnostic_save))
コード例 #8
0
def main(args):
    """main function"""
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
    exe = fluid.Executor(place)

    reader = task_reader.ClassifyReader(vocab_path=args.vocab_path,
                                        label_map_config=args.label_map_config,
                                        max_seq_len=args.max_seq_len,
                                        do_lower_case=args.do_lower_case,
                                        in_tokens=args.in_tokens,
                                        random_seed=args.random_seed,
                                        tokenizer=args.tokenizer,
                                        is_classify=args.is_classify,
                                        is_regression=args.is_regression,
                                        for_cn=args.for_cn,
                                        task_id=args.task_id)

    if not (args.do_train or args.do_val or args.do_test):
        raise ValueError("For args `do_train`, `do_val` and `do_test`, at "
                         "least one of them must be True.")

    if args.do_test:
        assert args.test_save is not None
    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    if args.predict_batch_size is None:
        args.predict_batch_size = args.batch_size
    if args.do_train:
        train_data_generator = reader.data_generator(
            input_file=args.train_set,
            batch_size=args.batch_size,
            epoch=args.epoch,
            dev_count=dev_count,
            shuffle=True,
            phase="train")

        num_train_examples = reader.get_num_examples(args.train_set)

        if args.in_tokens:
            max_train_steps = args.epoch * num_train_examples // (
                args.batch_size // args.max_seq_len) // dev_count
        else:
            max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        warmup_steps = int(max_train_steps * args.warmup_proportion)
        print("Device count: %d" % dev_count)
        print("Num train examples: %d" % num_train_examples)
        print("Max train steps: %d" % max_train_steps)
        print("Num warmup steps: %d" % warmup_steps)

        train_program = fluid.Program()
        """
        if args.random_seed is not None and args.enable_ce:
            train_program.random_seed = args.random_seed
        """
        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                train_pyreader, graph_vars = create_model(
                    args,
                    pyreader_name='train_reader',
                    ernie_config=ernie_config,
                    is_classify=args.is_classify,
                    is_regression=args.is_regression)
                scheduled_lr, loss_scaling = optimization(
                    loss=graph_vars["loss"],
                    warmup_steps=warmup_steps,
                    num_train_steps=max_train_steps,
                    learning_rate=args.learning_rate,
                    train_program=train_program,
                    startup_prog=startup_prog,
                    weight_decay=args.weight_decay,
                    scheduler=args.lr_scheduler,
                    use_fp16=args.use_fp16)

        if args.verbose:
            if args.in_tokens:
                lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                    program=train_program,
                    batch_size=args.batch_size // args.max_seq_len)
            else:
                lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                    program=train_program, batch_size=args.batch_size)
            print("Theoretical memory usage in training: %.3f - %.3f %s" %
                  (lower_mem, upper_mem, unit))

    if args.do_val or args.do_test:
        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
                test_pyreader, graph_vars = create_model(
                    args,
                    pyreader_name='test_reader',
                    ernie_config=ernie_config,
                    is_classify=args.is_classify,
                    is_regression=args.is_regression)

        test_prog = test_prog.clone(for_test=True)
    nccl2_num_trainers = 1
    nccl2_trainer_id = 0
    exe.run(startup_prog)

    if args.do_train:
        if args.init_checkpoint and args.init_pretraining_params:
            print(
                "WARNING: args 'init_checkpoint' and 'init_pretraining_params' "
                "both are set! Only arg 'init_checkpoint' is made valid.")
        if args.init_checkpoint:
            init_checkpoint(exe,
                            args.init_checkpoint,
                            main_program=startup_prog,
                            use_fp16=args.use_fp16)
        elif args.init_pretraining_params:
            init_pretraining_params(exe,
                                    args.init_pretraining_params,
                                    main_program=startup_prog,
                                    use_fp16=args.use_fp16)
    elif args.do_val or args.do_test:
        if not args.init_checkpoint:
            raise ValueError("args 'init_checkpoint' should be set if"
                             "only doing validation or testing!")
        init_checkpoint(exe,
                        args.init_checkpoint,
                        main_program=startup_prog,
                        use_fp16=args.use_fp16)

    if args.do_train:
        exec_strategy = fluid.ExecutionStrategy()
        if args.use_fast_executor:
            exec_strategy.use_experimental_executor = True
        exec_strategy.num_threads = dev_count
        exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope

        train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                           loss_name=graph_vars["loss"].name,
                                           exec_strategy=exec_strategy,
                                           main_program=train_program,
                                           num_trainers=nccl2_num_trainers,
                                           trainer_id=nccl2_trainer_id)

        train_pyreader.decorate_tensor_provider(train_data_generator)
    else:
        train_exe = None

    test_exe = exe
    if args.do_val or args.do_test:
        if args.use_multi_gpu_test:
            test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                              main_program=test_prog,
                                              share_vars_from=train_exe)

    steps = 10000
    current_epoch = 1
    if args.do_train:
        train_pyreader.start()
        steps = 0
        if warmup_steps > 0:
            graph_vars["learning_rate"] = scheduled_lr

        ce_info = []
        time_begin = time.time()
        last_epoch = 0
        current_epoch = 0
        previous_eval_acc = 0.80
        previous_train_acc = 0.90
        while True:
            try:
                steps += 1
                if steps % args.skip_steps != 0:
                    train_exe.run(fetch_list=[])
                else:
                    outputs = evaluate(train_exe,
                                       train_program,
                                       train_pyreader,
                                       graph_vars,
                                       "train",
                                       metric=args.metric,
                                       is_classify=args.is_classify,
                                       is_regression=args.is_regression)
                    acc = outputs["accuracy"]
                    if acc > previous_train_acc or acc > 0.95:
                        print(
                            "previous train accuracy is %f and current train accuracy is %f "
                            % (previous_train_acc, acc))
                        previous_train_acc = acc
                        eval_acc = evaluate_wrapper(args, reader, exe,
                                                    test_prog, test_pyreader,
                                                    graph_vars, current_epoch,
                                                    steps)
                        print(
                            "previous evaluate accuracy is %f and current evaluate accuracy is %f "
                            % (previous_eval_acc, eval_acc))
                        if eval_acc > previous_eval_acc:
                            previous_eval_acc = eval_acc
                            save_path = os.path.join(
                                args.checkpoints,
                                "evalacc_" + str(eval_acc).split('.')[1])
                            fluid.io.save_persistables(exe, save_path,
                                                       train_program)
                            predict_wrapper(args,
                                            reader,
                                            exe,
                                            test_prog,
                                            test_pyreader,
                                            graph_vars,
                                            current_epoch,
                                            steps="evalacc_" +
                                            str(eval_acc).split('.')[1])
                            print(
                                "predict and save model!!!!!!!!!!!!!!!!!!!!!!!!!! in %s"
                                % (save_path))
                    if args.verbose:
                        verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size(
                        )
                        verbose += "learning rate: %f" % (
                            outputs["learning_rate"]
                            if warmup_steps > 0 else args.learning_rate)
                        print(verbose)

                    current_example, current_epoch = reader.get_train_progress(
                    )
                    time_end = time.time()
                    used_time = time_end - time_begin
                    print(
                        "epoch: %d, progress: %d/%d, step: %d, ave loss: %f, "
                        "ave acc: %f, speed: %f steps/s" %
                        (current_epoch, current_example, num_train_examples,
                         steps, outputs["loss"], outputs["accuracy"],
                         args.skip_steps / used_time))
                    ce_info.append(
                        [outputs["loss"], outputs["accuracy"], used_time])

                    time_begin = time.time()

                # if steps % args.save_steps == 0:
                #     save_path = os.path.join(args.checkpoints,
                #                              "step_" + str(steps))
                #     fluid.io.save_persistables(exe, save_path, train_program)

                # if steps % args.validation_steps == 0 or last_epoch != current_epoch:
                #     # evaluate dev set
                #     if args.do_val:
                #         ret=evaluate_wrapper(args, reader, exe, test_prog,
                #                          test_pyreader, graph_vars,
                #                          current_epoch, steps)

                #     if args.do_test:
                #         predict_wrapper(args, reader, exe,
                #                 test_prog, test_pyreader, graph_vars,
                #                 current_epoch, steps)

                if last_epoch != current_epoch:
                    last_epoch = current_epoch

            except fluid.core.EOFException:
                save_path = os.path.join(args.checkpoints,
                                         "step_" + str(steps))
                fluid.io.save_persistables(exe, save_path, train_program)
                train_pyreader.reset()
                break

    # final eval on dev set
    # if args.do_val:
    #     evaluate_wrapper(args, reader, exe, test_prog, test_pyreader,
    #                      graph_vars, current_epoch, steps)

    # final eval on test set
    steps = 0
    # if args.do_test:
    #     current_epoch = 0
    #     predict_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars,
    #                     current_epoch, steps)

    # final eval on dianostic, hack for glue-ax
    if args.diagnostic:
        test_pyreader.decorate_tensor_provider(
            reader.data_generator(args.diagnostic,
                                  batch_size=args.batch_size,
                                  epoch=1,
                                  dev_count=1,
                                  shuffle=False))

        print("Final diagnostic")
        qids, preds, probs = predict(test_exe,
                                     test_prog,
                                     test_pyreader,
                                     graph_vars,
                                     is_classify=args.is_classify,
                                     is_regression=args.is_regression)
        assert len(qids) == len(preds), '{} v.s. {}'.format(
            len(qids), len(preds))
        with open(args.diagnostic_save, 'w') as f:
            for id, s, p in zip(qids, preds, probs):
                f.write('{}\t{}\t{}\n'.format(id, s, p))

        print("Done final diagnostic, saving to {}".format(
            args.diagnostic_save))
コード例 #9
0
def main(args):
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    reader = ClassifyReader(
        vocab_path=args.vocab_path,
        label_map_config=args.label_map_config,
        max_seq_len=args.max_seq_len,
        do_lower_case=args.do_lower_case,
        in_tokens=False)

    predict_prog = fluid.Program()
    predict_startup = fluid.Program()
    with fluid.program_guard(predict_prog, predict_startup):
        with fluid.unique_name.guard():
            predict_pyreader, probs, feed_target_names = create_model(
                args,
                pyreader_name='predict_reader',
                ernie_config=ernie_config,
                is_prediction=True)

    predict_prog = predict_prog.clone(for_test=True)

    if args.use_cuda:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(predict_startup)

    if args.init_checkpoint:
        init_pretraining_params(exe, args.init_checkpoint, predict_prog)
    else:
        raise ValueError("args 'init_checkpoint' should be set for prediction!")

    predict_exe = fluid.Executor(place)

    predict_data_generator = reader.data_generator(
        input_file=args.predict_set,
        batch_size=args.batch_size,
        epoch=1,
        shuffle=False)

    predict_pyreader.decorate_tensor_provider(predict_data_generator)

    predict_pyreader.start()
    all_results = []
    time_begin = time.time()
    while True:
        try:
            results = predict_exe.run(program=predict_prog, fetch_list=[probs.name])
            all_results.extend(results[0])
        except fluid.core.EOFException:
            predict_pyreader.reset()
            break
    time_end = time.time()

    np.set_printoptions(precision=4, suppress=True)
    print("-------------- prediction results --------------")
    for index, result in enumerate(all_results):
        print(str(index) + '\t{}'.format(result))
コード例 #10
0
def main(args):
    """main"""
    model_config = UNIMOConfig(args.unimo_config_path)
    model_config.print_config()

    gpu_id = 0
    gpus = fluid.core.get_cuda_device_count()
    if args.is_distributed and os.getenv("FLAGS_selected_gpus") is not None:
        gpu_list = os.getenv("FLAGS_selected_gpus").split(",")
        gpus = len(gpu_list)
        gpu_id = int(gpu_list[0])

    if args.use_cuda:
        place = fluid.CUDAPlace(gpu_id)
        dev_count = gpus
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    tokenizer = GptBpeTokenizer(vocab_file=args.unimo_vocab_file,
                                encoder_json_file=args.encoder_json_file,
                                vocab_bpe_file=args.vocab_bpe_file,
                                do_lower_case=args.do_lower_case)

    data_reader = ClassifyReader(tokenizer, args)

    if not (args.do_train or args.do_val or args.do_val_hard \
            or args.do_test or args.do_test_hard or args.do_diagnostic):
        raise ValueError("For args `do_train`, `do_val`, `do_val_hard`, `do_test`," \
                " `do_test_hard` and `do_diagnostic`, at least one of them must be True.")

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    if args.do_train:
        trainers_num = int(os.getenv("PADDLE_TRAINERS_NUM", "1"))
        train_data_generator = data_reader.data_generator(
            input_file=args.train_set,
            batch_size=args.batch_size,
            epoch=args.epoch,
            dev_count=trainers_num,
            shuffle=True,
            phase="train")

        num_train_examples = data_reader.get_num_examples(args.train_set)

        if args.in_tokens:
            max_train_steps = args.epoch * num_train_examples // (
                args.batch_size // args.max_seq_len) // trainers_num
        else:
            max_train_steps = args.epoch * num_train_examples // args.batch_size // trainers_num

        warmup_steps = int(max_train_steps * args.warmup_proportion)
        print("Device count: %d, gpu_id: %d" % (dev_count, gpu_id))
        print("Num train examples: %d" % num_train_examples)
        print("Max train steps: %d" % max_train_steps)
        print("Num warmup steps: %d" % warmup_steps)

        train_program = fluid.Program()

        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                train_pyreader, graph_vars = create_model(
                    args, pyreader_name='train_reader', config=model_config)
                scheduled_lr, loss_scaling = optimization(
                    loss=graph_vars["loss"],
                    warmup_steps=warmup_steps,
                    num_train_steps=max_train_steps,
                    learning_rate=args.learning_rate,
                    train_program=train_program,
                    weight_decay=args.weight_decay,
                    scheduler=args.lr_scheduler,
                    use_fp16=args.use_fp16,
                    use_dynamic_loss_scaling=args.use_dynamic_loss_scaling,
                    init_loss_scaling=args.init_loss_scaling,
                    beta1=args.beta1,
                    beta2=args.beta2,
                    epsilon=args.epsilon)

        if args.verbose:
            if args.in_tokens:
                lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                    program=train_program,
                    batch_size=args.batch_size // args.max_seq_len)
            else:
                lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                    program=train_program, batch_size=args.batch_size)
            print("Theoretical memory usage in training: %.3f - %.3f %s" %
                  (lower_mem, upper_mem, unit))

    if args.do_val or args.do_val_hard or args.do_test or args.do_test_hard \
            or args.do_pred or args.do_pred_hard or args.do_diagnostic:
        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
                test_pyreader, graph_vars = create_model(
                    args, pyreader_name='test_reader', config=model_config)

        test_prog = test_prog.clone(for_test=True)

    nccl2_num_trainers = 1
    nccl2_trainer_id = 0
    print("args.is_distributed:", args.is_distributed)
    if args.is_distributed:
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
        worker_endpoints_env = os.getenv("PADDLE_TRAINER_ENDPOINTS")
        current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT")
        worker_endpoints = worker_endpoints_env.split(",")
        trainers_num = len(worker_endpoints)

        print("worker_endpoints:{} trainers_num:{} current_endpoint:{} \
              trainer_id:{}".format(worker_endpoints, trainers_num,
                                    current_endpoint, trainer_id))

        # prepare nccl2 env.
        config = fluid.DistributeTranspilerConfig()
        config.mode = "nccl2"
        if args.nccl_comm_num > 1:
            config.nccl_comm_num = args.nccl_comm_num
        if args.use_hierarchical_allreduce and trainers_num > args.hierarchical_allreduce_inter_nranks:
            config.use_hierarchical_allreduce = args.use_hierarchical_allreduce
            config.hierarchical_allreduce_inter_nranks = args.hierarchical_allreduce_inter_nranks

            assert config.hierarchical_allreduce_inter_nranks > 1
            assert trainers_num % config.hierarchical_allreduce_inter_nranks == 0

            config.hierarchical_allreduce_exter_nranks = \
                trainers_num / config.hierarchical_allreduce_inter_nranks

        t = fluid.DistributeTranspiler(config=config)
        t.transpile(trainer_id,
                    trainers=worker_endpoints_env,
                    current_endpoint=current_endpoint,
                    program=train_program if args.do_train else test_prog,
                    startup_program=startup_prog)
        nccl2_num_trainers = trainers_num
        nccl2_trainer_id = trainer_id

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    if args.do_train:
        if args.init_checkpoint and args.init_pretraining_params:
            print(
                "WARNING: args 'init_checkpoint' and 'init_pretraining_params' "
                "both are set! Only arg 'init_checkpoint' is made valid.")
        if args.init_checkpoint:
            init_checkpoint(exe,
                            args.init_checkpoint,
                            main_program=train_program)
        elif args.init_pretraining_params:
            init_pretraining_params(exe,
                                    args.init_pretraining_params,
                                    main_program=train_program)
    elif args.do_val or args.do_val_hard or args.do_test or args.do_test_hard \
            or args.do_pred or args.do_pred_hard or args.do_diagnostic:
        if not args.init_checkpoint:
            raise ValueError("args 'init_checkpoint' should be set if"
                             "only doing validation or testing!")
        init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog)

    if args.do_train:
        exec_strategy = fluid.ExecutionStrategy()
        if args.use_fast_executor:
            exec_strategy.use_experimental_executor = True
        exec_strategy.num_threads = dev_count
        exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope

        train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                           loss_name=graph_vars["loss"].name,
                                           exec_strategy=exec_strategy,
                                           main_program=train_program,
                                           num_trainers=nccl2_num_trainers,
                                           trainer_id=nccl2_trainer_id)

        train_pyreader.decorate_tensor_provider(train_data_generator)
    else:
        train_exe = None

    test_exe = exe
    if args.do_val or args.do_val_hard or args.do_test or args.do_test_hard \
            or args.do_pred or args.do_pred_hard or args.do_diagnostic:
        if args.use_multi_gpu_test:
            test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                              main_program=test_prog,
                                              share_vars_from=train_exe)

    dev_ret_history = []  # (steps, key_eval, eval)
    dev_hard_ret_history = []  # (steps, key_eval, eval)
    test_ret_history = []  # (steps, key_eval, eval)
    test_hard_ret_history = []  # (steps, key_eval, eval)
    if args.do_train:
        train_pyreader.start()
        steps = 0
        if warmup_steps > 0:
            graph_vars["learning_rate"] = scheduled_lr

        time_begin = time.time()
        skip_steps = args.skip_steps
        while True:
            try:
                steps += 1
                if steps % skip_steps == 0:
                    train_fetch_list = [
                        graph_vars["loss"].name, graph_vars["accuracy"].name,
                        graph_vars["num_seqs"].name
                    ]
                    if "learning_rate" in graph_vars:
                        train_fetch_list.append(
                            graph_vars["learning_rate"].name)
                    res = train_exe.run(fetch_list=train_fetch_list)

                    outputs = {"loss": np.mean(res[0])}
                    if "learning_rate" in graph_vars:
                        outputs["learning_rate"] = float(res[3][0])

                    if args.verbose:
                        verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size(
                        )
                        verbose += "learning rate: %f" % (
                            outputs["learning_rate"]
                            if warmup_steps > 0 else args.learning_rate)
                        print(verbose)

                    current_example, current_epoch = data_reader.get_train_progress(
                    )
                    time_end = time.time()
                    used_time = time_end - time_begin
                    print("%s - epoch: %d, progress: %d/%d, step: %d, ave loss: %f, speed: %f steps/s" % \
                          (get_time(), current_epoch, current_example, num_train_examples, steps, \
                          outputs["loss"], args.skip_steps / used_time))
                    time_begin = time.time()
                else:
                    train_exe.run(fetch_list=[])

                if nccl2_trainer_id == 0:
                    if steps % args.save_steps == 0 and args.save_checkpoints:
                        save_path = os.path.join(args.checkpoints,
                                                 "step_" + str(steps))
                        fluid.io.save_persistables(exe, save_path,
                                                   train_program)

                    if steps % args.validation_steps == 0:
                        # evaluate dev set
                        if args.do_val:
                            test_pyreader.decorate_tensor_provider(
                                data_reader.data_generator(
                                    args.dev_set,
                                    batch_size=args.batch_size,
                                    epoch=1,
                                    dev_count=1,
                                    shuffle=False))
                            outputs = evaluate(args, test_exe, test_prog,
                                               test_pyreader, graph_vars,
                                               "dev")
                            dev_ret_history.append(
                                (steps, outputs['key_eval'],
                                 outputs[outputs['key_eval']]))

                        # evaluate dev_hard set
                        if args.do_val_hard:
                            test_pyreader.decorate_tensor_provider(
                                data_reader.data_generator(
                                    args.dev_hard_set,
                                    batch_size=args.batch_size,
                                    epoch=1,
                                    dev_count=1,
                                    shuffle=False))
                            outputs = evaluate(args, test_exe, test_prog,
                                               test_pyreader, graph_vars,
                                               "dev_hard")
                            dev_hard_ret_history.append(
                                (steps, outputs['key_eval'],
                                 outputs[outputs['key_eval']]))

                        # evaluate test set
                        if args.do_test:
                            test_pyreader.decorate_tensor_provider(
                                data_reader.data_generator(
                                    args.test_set,
                                    batch_size=args.batch_size,
                                    epoch=1,
                                    dev_count=1,
                                    shuffle=False))
                            outputs = evaluate(args, test_exe, test_prog,
                                               test_pyreader, graph_vars,
                                               "test")
                            test_ret_history.append(
                                (steps, outputs['key_eval'],
                                 outputs[outputs['key_eval']]))

                        # evaluate test_hard set
                        if args.do_test_hard:
                            test_pyreader.decorate_tensor_provider(
                                data_reader.data_generator(
                                    args.test_hard_set,
                                    batch_size=args.batch_size,
                                    epoch=1,
                                    dev_count=1,
                                    shuffle=False))
                            outputs = evaluate(args, test_exe, test_prog,
                                               test_pyreader, graph_vars,
                                               "test_hard")
                            test_hard_ret_history.append(
                                (steps, outputs['key_eval'],
                                 outputs[outputs['key_eval']]))

                        # pred diagnostic set
                        if args.do_diagnostic:
                            test_pyreader.decorate_tensor_provider(
                                data_reader.data_generator(
                                    args.diagnostic_set,
                                    batch_size=args.batch_size,
                                    epoch=1,
                                    dev_count=1,
                                    shuffle=False))
                            qids, preds, probs = predict(test_exe,
                                                         test_prog,
                                                         test_pyreader,
                                                         graph_vars,
                                                         dev_count=1)
                            save_path = args.pred_save + '.diagnostic.' + str(
                                steps) + '.txt'
                            print("testing {}, save to {}".format(
                                args.diagnostic_set, save_path))
                            with open(save_path, 'w') as f:
                                for id, s, p in zip(qids, preds, probs):
                                    f.write('{}\t{}\t{}\n'.format(id, s, p))

                        # pred test set
                        if args.do_pred:
                            test_pyreader.decorate_tensor_provider(
                                data_reader.data_generator(
                                    args.test_set,
                                    batch_size=args.batch_size,
                                    epoch=1,
                                    dev_count=1,
                                    shuffle=False))
                            qids, preds, probs = predict(test_exe,
                                                         test_prog,
                                                         test_pyreader,
                                                         graph_vars,
                                                         dev_count=1)
                            save_path = args.pred_save + '.test.' + str(
                                steps) + '.txt'
                            print("testing {}, save to {}".format(
                                args.test_set, save_path))
                            with open(save_path, 'w') as f:
                                for id, s, p in zip(qids, preds, probs):
                                    f.write('{}\t{}\t{}\n'.format(id, s, p))

                        # pred test hard set
                        if args.do_pred_hard:
                            test_pyreader.decorate_tensor_provider(
                                data_reader.data_generator(
                                    args.test_hard_set,
                                    batch_size=args.batch_size,
                                    epoch=1,
                                    dev_count=1,
                                    shuffle=False))
                            qids, preds, probs = predict(test_exe,
                                                         test_prog,
                                                         test_pyreader,
                                                         graph_vars,
                                                         dev_count=1)
                            save_path = args.pred_save + '.test_hard.' + str(
                                steps) + '.txt'
                            print("testing {}, save to {}".format(
                                args.test_hard_set, save_path))
                            with open(save_path, 'w') as f:
                                for id, s, p in zip(qids, preds, probs):
                                    f.write('{}\t{}\t{}\n'.format(id, s, p))

            except fluid.core.EOFException:
                if args.save_checkpoints:
                    save_path = os.path.join(args.checkpoints,
                                             "step_" + str(steps))
                    fluid.io.save_persistables(exe, save_path, train_program)
                train_pyreader.reset()
                break

    if nccl2_trainer_id == 0:
        # final pred on diagnostic set
        if args.do_diagnostic:
            test_pyreader.decorate_tensor_provider(
                data_reader.data_generator(args.diagnostic_set,
                                           batch_size=args.batch_size,
                                           epoch=1,
                                           dev_count=1,
                                           shuffle=False))
            qids, preds, probs = predict(test_exe,
                                         test_prog,
                                         test_pyreader,
                                         graph_vars,
                                         dev_count=1)
            save_path = args.pred_save + '.diagnostic.' + str(steps) + '.txt'
            print("testing {}, save to {}".format(args.diagnostic_set,
                                                  save_path))
            with open(save_path, 'w') as f:
                for id, s, p in zip(qids, preds, probs):
                    f.write('{}\t{}\t{}\n'.format(id, s, p))

        # final pred on test set
        if args.do_pred:
            test_pyreader.decorate_tensor_provider(
                data_reader.data_generator(args.test_set,
                                           batch_size=args.batch_size,
                                           epoch=1,
                                           dev_count=1,
                                           shuffle=False))
            qids, preds, probs = predict(test_exe,
                                         test_prog,
                                         test_pyreader,
                                         graph_vars,
                                         dev_count=1)
            save_path = args.pred_save + '.test.' + str(steps) + '.txt'
            print("testing {}, save to {}".format(args.test_set, save_path))
            with open(save_path, 'w') as f:
                for id, s, p in zip(qids, preds, probs):
                    f.write('{}\t{}\t{}\n'.format(id, s, p))

        # final pred on test_hard set
        if args.do_pred_hard:
            test_pyreader.decorate_tensor_provider(
                data_reader.data_generator(args.test_hard_set,
                                           batch_size=args.batch_size,
                                           epoch=1,
                                           dev_count=1,
                                           shuffle=False))
            qids, preds, probs = predict(test_exe,
                                         test_prog,
                                         test_pyreader,
                                         graph_vars,
                                         dev_count=1)
            save_path = args.pred_save + '.test_hard.' + str(steps) + '.txt'
            print("testing {}, save to {}".format(args.test_hard_set,
                                                  save_path))
            with open(save_path, 'w') as f:
                for id, s, p in zip(qids, preds, probs):
                    f.write('{}\t{}\t{}\n'.format(id, s, p))

        # final eval on test set
        if args.do_test:
            test_pyreader.decorate_tensor_provider(
                data_reader.data_generator(args.test_set,
                                           batch_size=args.batch_size,
                                           epoch=1,
                                           dev_count=1,
                                           shuffle=False))
            print("Final test result:")
            outputs = evaluate(args, test_exe, test_prog, test_pyreader,
                               graph_vars, "test")
            test_ret_history.append(
                (steps, outputs['key_eval'], outputs[outputs['key_eval']]))
            test_ret_history = sorted(test_ret_history,
                                      key=lambda a: a[2],
                                      reverse=True)
            print("Best testing result: step %d %s %f" %
                  (test_ret_history[0][0], test_ret_history[0][1],
                   test_ret_history[0][2]))

        # final eval on test hard set
        if args.do_test_hard:
            test_pyreader.decorate_tensor_provider(
                data_reader.data_generator(args.test_hard_set,
                                           batch_size=args.batch_size,
                                           epoch=1,
                                           dev_count=1,
                                           shuffle=False))
            print("Final test_hard result:")
            outputs = evaluate(args, test_exe, test_prog, test_pyreader,
                               graph_vars, "test_hard")
            test_hard_ret_history.append(
                (steps, outputs['key_eval'], outputs[outputs['key_eval']]))
            test_hard_ret_history = sorted(test_hard_ret_history,
                                           key=lambda a: a[2],
                                           reverse=True)
            print("Best testing hard result: step %d %s %f" %
                  (test_hard_ret_history[0][0], test_hard_ret_history[0][1],
                   test_hard_ret_history[0][2]))

        # final eval on dev set
        if args.do_val:
            test_pyreader.decorate_tensor_provider(
                data_reader.data_generator(args.dev_set,
                                           batch_size=args.batch_size,
                                           epoch=1,
                                           dev_count=1,
                                           shuffle=False))
            print("Final validation result:")
            outputs = evaluate(args, test_exe, test_prog, test_pyreader,
                               graph_vars, "dev")
            dev_ret_history.append(
                (steps, outputs['key_eval'], outputs[outputs['key_eval']]))
            dev_ret_history = sorted(dev_ret_history,
                                     key=lambda a: a[2],
                                     reverse=True)
            print("Best validation result: step %d %s %f" %
                  (dev_ret_history[0][0], dev_ret_history[0][1],
                   dev_ret_history[0][2]))

        # final eval on dev hard set
        if args.do_val_hard:
            test_pyreader.decorate_tensor_provider(
                data_reader.data_generator(args.dev_hard_set,
                                           batch_size=args.batch_size,
                                           epoch=1,
                                           dev_count=1,
                                           shuffle=False))
            print("Final validation_hard result:")
            outputs = evaluate(args, test_exe, test_prog, test_pyreader,
                               graph_vars, "dev_hard")
            dev_hard_ret_history.append(
                (steps, outputs['key_eval'], outputs[outputs['key_eval']]))
            dev_hard_ret_history = sorted(dev_hard_ret_history,
                                          key=lambda a: a[2],
                                          reverse=True)
            print("Best validation_hard result: step %d %s %f" %
                  (dev_hard_ret_history[0][0], dev_hard_ret_history[0][1],
                   dev_hard_ret_history[0][2]))
コード例 #11
0
def gen_huggingface_bert_model(params_path):
    import paddle.fluid as fluid
    import sys
    sys.path.append("./LARK/ERNIE")
    from model.ernie import ErnieConfig
    from finetune.classifier import create_model
    from utils.init import init_pretraining_params

    ernie_config = ErnieConfig("./LARK/ERNIE/config/ernie_config.json")
    startup_prog = fluid.default_startup_program()
    test_prog = fluid.Program()

    args.max_seq_len = 512
    args.use_fp16 = False
    args.num_labels = 2
    args.loss_scaling = 1.0
    with fluid.program_guard(test_prog, startup_prog):
        with fluid.unique_name.guard():
            _, _ = create_model(args,
                                pyreader_name="test",
                                ernie_config=ernie_config)

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)
    init_pretraining_params(exe, params_path, main_program=startup_prog)
    sc = fluid.global_scope()
    new_model = collections.OrderedDict()
    for each in startup_prog.list_vars():
        name = each.name
        if name == "test_reader":
            continue
        fc_w = sc.find_var(name).get_tensor()
        fc_w = np.array(fc_w, dtype=np.float32)
        if name == "word_embedding":
            new_model["embeddings.word_embeddings.weight"] = fc_w
        if name == "pos_embedding":
            new_model["embeddings.position_embeddings.weight"] = fc_w
        if name == "sent_embedding":
            new_model["embeddings.token_type_embeddings.weight"] = fc_w
        if name == "pre_encoder_layer_norm_scale":
            new_model["embeddings.LayerNorm.gamma"] = fc_w
        if name == "pre_encoder_layer_norm_bias":
            new_model["embeddings.LayerNorm.beta"] = fc_w
        if name.startswith("encoder_layer_"):
            splits = name.split(".")
            if len(splits) == 2:
                prefix, suffix = splits
            else:
                prefix = splits[0]
            prefixs = prefix.split("_")
            if prefixs[3] == "multi":
                new_suffix = ".weight" if suffix == "w_0" else ".bias"
                if new_suffix == ".weight":
                    fc_w = fc_w.transpose()
                if prefixs[6] == "output":
                    all_name = "encoder.layer." + prefixs[2] + \
                        ".attention.output.dense" + new_suffix
                else:
                    all_name = "encoder.layer." + prefixs[2] + \
                        ".attention.self." + prefixs[6] + new_suffix
            elif prefixs[3] == "post":
                new_suffix = ".gamma" if name.endswith("scale") else ".beta"
                if prefixs[4] == "att":
                    all_name = "encoder.layer." + prefixs[2] + \
                        ".attention.output.LayerNorm" + new_suffix
                elif prefixs[4] == "ffn":
                    all_name = "encoder.layer." + prefixs[2] + \
                        ".output.LayerNorm" + new_suffix
            elif prefixs[3] == "ffn":
                new_suffix = ".weight" if suffix == "w_0" else ".bias"
                if new_suffix == ".weight":
                    fc_w = fc_w.transpose()
                if prefixs[5] == "0":
                    all_name = "encoder.layer." + prefixs[2] + \
                        ".intermediate.dense" + new_suffix
                elif prefixs[5] == "1":
                    all_name = "encoder.layer." + prefixs[2] + \
                        ".output.dense" + new_suffix
            new_model[all_name] = fc_w
        if name == "pooled_fc.w_0":
            fc_w = fc_w.transpose()
            new_model["pooler.dense.weight"] = fc_w
        if name == "pooled_fc.b_0":
            new_model["pooler.dense.bias"] = fc_w
    return new_model
コード例 #12
0
        raise ValueError("args 'init_checkpoint' should be set if"
                         "only doing validation or testing!")

    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    startup_prog = fluid.Program()
    test_program = fluid.Program()

    with fluid.program_guard(test_program, startup_prog):
        with fluid.unique_name.guard():
            _, _ = create_model(args,
                                pyreader_name='test_reader',
                                ernie_config=ernie_config,
                                is_classify=True)

    exe.run(startup_prog)

    init_pretraining_params(
        exe,
        args.init_checkpoint,
        main_program=test_program,
        #main_program=startup_prog,
        use_fp16=args.use_fp16)

    name2params = {}
    prefix = args.init_checkpoint
    for var in startup_prog.list_vars():
        path = os.path.join(prefix, var.name)