def do_eval(args):
    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()

    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()
    test_program = fluid.Program()
    with fluid.program_guard(test_program, fluid.default_startup_program()):
        with fluid.unique_name.guard():
            test_ret = creator.create_ernie_model(args, ernie_config)
    test_program = test_program.clone(for_test=True)

    pyreader = creator.create_pyreader(args, file_name=args.test_data,
                                          feed_list=test_ret['feed_list'],
                                          model="ernie",
                                          place=place,
                                          mode='test',)

    print('program startup')

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    print('program loading')
    # load model
    if not args.init_checkpoint:
        raise ValueError("args 'init_checkpoint' should be set if only doing test or infer!")
    utils.init_checkpoint(exe, args.init_checkpoint, test_program)

    evaluate(exe, test_program, pyreader, test_ret)
Exemple #2
0
def do_infer(args):
    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()

    # define network and reader
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()
    infer_program = fluid.Program()
    with fluid.program_guard(infer_program, fluid.default_startup_program()):
        with fluid.unique_name.guard():
            infer_ret = creator.create_ernie_model(args,
                                                   ernie_config,
                                                   is_prediction=False)
    infer_program = infer_program.clone(for_test=True)
    print(args.test_data)
    pyreader, reader = creator.create_pyreader(
        args,
        file_name=args.test_data,
        feed_list=infer_ret['feed_list'],
        mode="ernie",
        place=place,
        iterable=True,
        return_reader=True,
        for_test=True)

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # load model
    if not args.init_checkpoint:
        raise ValueError(
            "args 'init_checkpoint' should be set if only doing test or infer!"
        )
    utils.init_checkpoint(exe, args.init_checkpoint, infer_program)

    # create dict
    id2word_dict = dict([(str(word_id), word)
                         for word, word_id in reader.vocab.items()])
    id2label_dict = dict([(str(label_id), label)
                          for label, label_id in reader.label_map.items()])
    Dataset = namedtuple("Dataset", ["id2word_dict", "id2label_dict"])
    dataset = Dataset(id2word_dict, id2label_dict)

    # make prediction
    for data in pyreader():
        (words, crf_decode) = exe.run(
            infer_program,
            fetch_list=[infer_ret["words"], infer_ret["crf_decode"]],
            feed=data[0],
            return_numpy=False)
        # User should notice that words had been clipped if long than args.max_seq_len
        results = utils.parse_result(words, crf_decode, dataset)
        for sent, tags in results:
            result_list = [
                '(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags)
            ]
            print(''.join(result_list))
def do_eval(args):
    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()
    print('ernie config') 
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()
    test_program = fluid.Program()
    print('test program') 
    with fluid.program_guard(test_program, fluid.default_startup_program()):
        with fluid.unique_name.guard():
            test_ret = creator.create_ernie_model(args, ernie_config)
    test_program = test_program.clone(for_test=True)
    #print('create pyreader') 
    pyreader = creator.create_pyreader(
        args,
        file_name=args.test_data,
        feed_list=[ret.name for ret in test_ret['feed_list']],
        model="ernie",
        place=place,
        return_reader=True,
        mode='test')

    #data_inter = reader.data_generator(args.test_data, args.batch_size, 1, shuffle=False, phase="train")

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # load model
    if not args.init_checkpoint:
        raise ValueError(
            "args 'init_checkpoint' should be set if only doing test or infer!")
    model_utils.init_checkpoint(exe, args.init_checkpoint, test_program)
    
    teacher = Teacher(out_path=None, out_port=int(args.out_port))
    teacher.start()
    print('run teacher......')
    
    test_ret["chunk_evaluator"].reset()
   
    reader_config = {"batch_generator": pyreader}

    teacher.start_knowledge_service(
            feed_list=[test_ret["words"].name, test_ret["sent_ids"].name, test_ret["pos_ids"].name, test_ret["input_mask"].name, test_ret["labels"].name, test_ret["seq_lens"].name],
            schema={"crf_decode":test_ret["crf_decode"],"seq_lens":test_ret["seq_lens"]},
            program=test_program,
            reader_config=reader_config,
            exe=exe,
            times=10)
def do_train(args):
    """
    Main Function
    """
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = 1
    else:
        dev_count = min(multiprocessing.cpu_count(), args.cpu_num)
        if (dev_count < args.cpu_num):
            print(
                "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. "
                "Change the cpu_num from %d to %d" %
                (dev_count, args.cpu_num, dev_count))
        os.environ['CPU_NUM'] = str(dev_count)
        place = fluid.CPUPlace()

    exe = fluid.Executor(place)

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    train_program = fluid.Program()
    with fluid.program_guard(train_program, startup_prog):
        with fluid.unique_name.guard():
            # user defined model based on ernie embeddings
            train_ret = creator.create_ernie_model(args, ernie_config)

            # ernie pyreader
            train_pyreader = creator.create_pyreader(
                args,
                file_name=args.train_data,
                feed_list=train_ret['feed_list'],
                model="ernie",
                place=place)

            test_program = train_program.clone(for_test=True)
            test_pyreader = creator.create_pyreader(
                args,
                file_name=args.test_data,
                feed_list=train_ret['feed_list'],
                model="ernie",
                place=place)

            clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
            optimizer = fluid.optimizer.Adam(
                learning_rate=args.base_learning_rate, grad_clip=clip)
            optimizer.minimize(train_ret["avg_cost"])

    lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
        program=train_program, batch_size=args.batch_size)
    print("Theoretical memory usage in training: %.3f - %.3f %s" %
          (lower_mem, upper_mem, unit))
    print("Device count: %d" % dev_count)

    exe.run(startup_prog)
    # load checkpoints
    if args.init_checkpoint and args.init_pretraining_params:
        print("WARNING: args 'init_checkpoint' and 'init_pretraining_params' "
              "both are set! Only arg 'init_checkpoint' is made valid.")
    if args.init_checkpoint:
        utils.init_checkpoint(exe, args.init_checkpoint, startup_prog)
    elif args.init_pretraining_params:
        utils.init_pretraining_params(exe, args.init_pretraining_params,
                                      startup_prog)

    if dev_count > 1 and not args.use_cuda:
        device = "GPU" if args.use_cuda else "CPU"
        print("%d %s are used to train model" % (dev_count, device))

        # multi cpu/gpu config
        exec_strategy = fluid.ExecutionStrategy()
        build_strategy = fluid.BuildStrategy()
        compiled_prog = fluid.compiler.CompiledProgram(
            train_program).with_data_parallel(
                loss_name=train_ret['avg_cost'].name,
                build_strategy=build_strategy,
                exec_strategy=exec_strategy)
    else:
        compiled_prog = fluid.compiler.CompiledProgram(train_program)

    # start training
    steps = 0
    for epoch_id in range(args.epoch):
        for data in train_pyreader():
            steps += 1
            if steps % args.print_steps == 0:
                fetch_list = [
                    train_ret["avg_cost"],
                    train_ret["precision"],
                    train_ret["recall"],
                    train_ret["f1_score"],
                ]
            else:
                fetch_list = []

            start_time = time.time()

            outputs = exe.run(program=compiled_prog,
                              feed=data[0],
                              fetch_list=fetch_list)
            end_time = time.time()
            if steps % args.print_steps == 0:
                loss, precision, recall, f1_score = [
                    np.mean(x) for x in outputs
                ]
                print(
                    "[train] batch_id = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f, "
                    "pyreader queue_size: %d " %
                    (steps, loss, precision, recall, f1_score,
                     end_time - start_time, train_pyreader.queue.size()))

            if steps % args.save_steps == 0:
                save_path = os.path.join(args.model_save_dir,
                                         "step_" + str(steps), "checkpoint")
                print("\tsaving model as %s" % (save_path))
                fluid.save(train_program, save_path)

            if steps % args.validation_steps == 0:
                evaluate(exe, test_program, test_pyreader, train_ret)

    save_path = os.path.join(args.model_save_dir, "step_" + str(steps),
                             "checkpoint")
    fluid.save(train_program, save_path)