コード例 #1
0
def do_eval(args):
    dataset = reader.Dataset(args)

    test_program = fluid.Program()
    with fluid.program_guard(test_program, fluid.default_startup_program()):
        with fluid.unique_name.guard():
            test_ret = creator.create_model(
                args, dataset.vocab_size, dataset.num_labels, mode='test')
    test_program = test_program.clone(for_test=True)

    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()

    pyreader = creator.create_pyreader(args, file_name=args.test_data,
                                       feed_list=test_ret['feed_list'],
                                       place=place,
                                       mode='lac',
                                       reader=dataset,
                                       iterable=True,
                                       for_test=True)

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # load model
    utils.init_checkpoint(exe, args.init_checkpoint+'.pdckpt', test_program)
    test_process(exe=exe,
                 program=test_program,
                 reader=pyreader,
                 test_ret=test_ret
                 )
コード例 #2
0
def do_eval(args):
    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()

    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()
    test_program = fluid.Program()
    with fluid.program_guard(test_program, fluid.default_startup_program()):
        with fluid.unique_name.guard():
            test_ret = creator.create_ernie_model(args, ernie_config)
    test_program = test_program.clone(for_test=True)

    pyreader = creator.create_pyreader(args, file_name=args.test_data,
                                          feed_list=test_ret['feed_list'],
                                          model="ernie",
                                          place=place,
                                          mode='test',)

    print('program startup')

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    print('program loading')
    # load model
    if not args.init_checkpoint:
        raise ValueError("args 'init_checkpoint' should be set if only doing test or infer!")
    utils.init_checkpoint(exe, args.init_checkpoint, test_program)

    evaluate(exe, test_program, pyreader, test_ret)
コード例 #3
0
def do_infer(args):
    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()

    # define network and reader
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()
    infer_program = fluid.Program()
    with fluid.program_guard(infer_program, fluid.default_startup_program()):
        with fluid.unique_name.guard():
            infer_ret = creator.create_ernie_model(args,
                                                   ernie_config,
                                                   is_prediction=False)
    infer_program = infer_program.clone(for_test=True)
    print(args.test_data)
    pyreader, reader = creator.create_pyreader(
        args,
        file_name=args.test_data,
        feed_list=infer_ret['feed_list'],
        mode="ernie",
        place=place,
        iterable=True,
        return_reader=True,
        for_test=True)

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # load model
    if not args.init_checkpoint:
        raise ValueError(
            "args 'init_checkpoint' should be set if only doing test or infer!"
        )
    utils.init_checkpoint(exe, args.init_checkpoint, infer_program)

    # create dict
    id2word_dict = dict([(str(word_id), word)
                         for word, word_id in reader.vocab.items()])
    id2label_dict = dict([(str(label_id), label)
                          for label, label_id in reader.label_map.items()])
    Dataset = namedtuple("Dataset", ["id2word_dict", "id2label_dict"])
    dataset = Dataset(id2word_dict, id2label_dict)

    # make prediction
    for data in pyreader():
        (words, crf_decode) = exe.run(
            infer_program,
            fetch_list=[infer_ret["words"], infer_ret["crf_decode"]],
            feed=data[0],
            return_numpy=False)
        # User should notice that words had been clipped if long than args.max_seq_len
        results = utils.parse_result(words, crf_decode, dataset)
        for sent, tags in results:
            result_list = [
                '(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags)
            ]
            print(''.join(result_list))
コード例 #4
0
def do_eval(args):
    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()
    print('ernie config') 
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()
    test_program = fluid.Program()
    print('test program') 
    with fluid.program_guard(test_program, fluid.default_startup_program()):
        with fluid.unique_name.guard():
            test_ret = creator.create_ernie_model(args, ernie_config)
    test_program = test_program.clone(for_test=True)
    #print('create pyreader') 
    pyreader = creator.create_pyreader(
        args,
        file_name=args.test_data,
        feed_list=[ret.name for ret in test_ret['feed_list']],
        model="ernie",
        place=place,
        return_reader=True,
        mode='test')

    #data_inter = reader.data_generator(args.test_data, args.batch_size, 1, shuffle=False, phase="train")

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # load model
    if not args.init_checkpoint:
        raise ValueError(
            "args 'init_checkpoint' should be set if only doing test or infer!")
    model_utils.init_checkpoint(exe, args.init_checkpoint, test_program)
    
    teacher = Teacher(out_path=None, out_port=int(args.out_port))
    teacher.start()
    print('run teacher......')
    
    test_ret["chunk_evaluator"].reset()
   
    reader_config = {"batch_generator": pyreader}

    teacher.start_knowledge_service(
            feed_list=[test_ret["words"].name, test_ret["sent_ids"].name, test_ret["pos_ids"].name, test_ret["input_mask"].name, test_ret["labels"].name, test_ret["seq_lens"].name],
            schema={"crf_decode":test_ret["crf_decode"],"seq_lens":test_ret["seq_lens"]},
            program=test_program,
            reader_config=reader_config,
            exe=exe,
            times=10)
コード例 #5
0
def do_eval(args):
    words = fluid.data(name='words',
                       shape=[None, 1],
                       dtype='int64',
                       lod_level=1)
    targets = fluid.data(name='targets',
                         shape=[None, 1],
                         dtype='int64',
                         lod_level=1)
    dataset = reader.Dataset(args)
    pyreader = creator.create_pyreader(
        args,
        file_name=args.test_data,
        # feed_list = test_ret['feed_list'],
        feed_list=[words, targets],
        place=fluid.CPUPlace(),
        model='lac',
        reader=dataset,
        mode='test')
    lods = []
    words = []
    targets = []
    sum_words = 0
    sum_sentences = 0

    for data in pyreader():
        print(len(data[0]['words'].lod()[0]))
        print(data[0]['words'])
        new_lod = data[0]['words'].lod()[0][1]
        new_words = np.array(data[0]['words'])
        new_targets = np.array(data[0]['targets'])
        assert new_lod == len(new_words)
        assert new_lod == len(new_targets)
        lods.append(new_lod)
        words.extend(new_words.flatten())
        targets.extend(new_targets.flatten())
        sum_sentences = sum_sentences + 1
        sum_words = sum_words + new_lod
    file1 = open(args.save_bin_path, "w+b")
    file1.write(np.array(int(sum_sentences)).astype('int64').tobytes())
    file1.write(np.array(int(sum_words)).astype('int64').tobytes())
    file1.write(np.array(lods).astype('uint64').tobytes())
    file1.write(np.array(words).astype('int64').tobytes())
    file1.write(np.array(targets).astype('int64').tobytes())
    file1.close()
    print(
        "SUCCESS!! Binary file saved at ",
        args.save_bin_path,
    )
コード例 #6
0
def do_infer(args):
    dataset = reader.Dataset(args)

    infer_program = fluid.Program()
    with fluid.program_guard(infer_program, fluid.default_startup_program()):
        with fluid.unique_name.guard():

            infer_ret = creator.create_model(args,
                                             dataset.vocab_size,
                                             dataset.num_labels,
                                             mode='infer')
    infer_program = infer_program.clone(for_test=True)

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()

    pyreader = creator.create_pyreader(args,
                                       file_name=args.infer_data,
                                       feed_list=infer_ret['feed_list'],
                                       place=place,
                                       model='lac',
                                       reader=dataset,
                                       mode='infer')

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # load model
    utils.init_checkpoint(exe, args.init_checkpoint, infer_program)

    result = infer_process(
        exe=exe,
        program=infer_program,
        reader=pyreader,
        fetch_vars=[infer_ret['words'], infer_ret['crf_decode']],
        dataset=dataset)
    with open('../processed.txt', 'w') as f:
        for sent, tags in result:
            result_list = [
                '(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags)
            ]
            f.write(''.join(result_list) + '\n')
コード例 #7
0
def do_train(args):
    """
    Main Function
    """
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = 1
    else:
        dev_count = min(multiprocessing.cpu_count(), args.cpu_num)
        if (dev_count < args.cpu_num):
            print(
                "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. "
                "Change the cpu_num from %d to %d" %
                (dev_count, args.cpu_num, dev_count))
        os.environ['CPU_NUM'] = str(dev_count)
        place = fluid.CPUPlace()

    exe = fluid.Executor(place)

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    train_program = fluid.Program()
    with fluid.program_guard(train_program, startup_prog):
        with fluid.unique_name.guard():
            # user defined model based on ernie embeddings
            train_ret = creator.create_ernie_model(args, ernie_config)

            # ernie pyreader
            train_pyreader = creator.create_pyreader(
                args,
                file_name=args.train_data,
                feed_list=train_ret['feed_list'],
                model="ernie",
                place=place)

            test_program = train_program.clone(for_test=True)
            test_pyreader = creator.create_pyreader(
                args,
                file_name=args.test_data,
                feed_list=train_ret['feed_list'],
                model="ernie",
                place=place)

            clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
            optimizer = fluid.optimizer.Adam(
                learning_rate=args.base_learning_rate, grad_clip=clip)
            optimizer.minimize(train_ret["avg_cost"])

    lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
        program=train_program, batch_size=args.batch_size)
    print("Theoretical memory usage in training: %.3f - %.3f %s" %
          (lower_mem, upper_mem, unit))
    print("Device count: %d" % dev_count)

    exe.run(startup_prog)
    # load checkpoints
    if args.init_checkpoint and args.init_pretraining_params:
        print("WARNING: args 'init_checkpoint' and 'init_pretraining_params' "
              "both are set! Only arg 'init_checkpoint' is made valid.")
    if args.init_checkpoint:
        utils.init_checkpoint(exe, args.init_checkpoint, startup_prog)
    elif args.init_pretraining_params:
        utils.init_pretraining_params(exe, args.init_pretraining_params,
                                      startup_prog)

    if dev_count > 1 and not args.use_cuda:
        device = "GPU" if args.use_cuda else "CPU"
        print("%d %s are used to train model" % (dev_count, device))

        # multi cpu/gpu config
        exec_strategy = fluid.ExecutionStrategy()
        build_strategy = fluid.BuildStrategy()
        compiled_prog = fluid.compiler.CompiledProgram(
            train_program).with_data_parallel(
                loss_name=train_ret['avg_cost'].name,
                build_strategy=build_strategy,
                exec_strategy=exec_strategy)
    else:
        compiled_prog = fluid.compiler.CompiledProgram(train_program)

    # start training
    steps = 0
    for epoch_id in range(args.epoch):
        for data in train_pyreader():
            steps += 1
            if steps % args.print_steps == 0:
                fetch_list = [
                    train_ret["avg_cost"],
                    train_ret["precision"],
                    train_ret["recall"],
                    train_ret["f1_score"],
                ]
            else:
                fetch_list = []

            start_time = time.time()

            outputs = exe.run(program=compiled_prog,
                              feed=data[0],
                              fetch_list=fetch_list)
            end_time = time.time()
            if steps % args.print_steps == 0:
                loss, precision, recall, f1_score = [
                    np.mean(x) for x in outputs
                ]
                print(
                    "[train] batch_id = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f, "
                    "pyreader queue_size: %d " %
                    (steps, loss, precision, recall, f1_score,
                     end_time - start_time, train_pyreader.queue.size()))

            if steps % args.save_steps == 0:
                save_path = os.path.join(args.model_save_dir,
                                         "step_" + str(steps), "checkpoint")
                print("\tsaving model as %s" % (save_path))
                fluid.save(train_program, save_path)

            if steps % args.validation_steps == 0:
                evaluate(exe, test_program, test_pyreader, train_ret)

    save_path = os.path.join(args.model_save_dir, "step_" + str(steps),
                             "checkpoint")
    fluid.save(train_program, save_path)
コード例 #8
0
ファイル: train.py プロジェクト: guoshengCS/rnn-benchmark
def do_train(args):
    best_score = -999
    train_program = fluid.default_main_program()
    startup_program = fluid.default_startup_program()

    dataset = reader.Dataset(args)
    with fluid.program_guard(train_program, startup_program):
        train_program.random_seed = args.random_seed
        startup_program.random_seed = args.random_seed

        with fluid.unique_name.guard():
            train_ret = creator.create_model(args,
                                             dataset.vocab_size,
                                             dataset.num_labels,
                                             mode='train')
            test_program = train_program.clone(for_test=True)

            optimizer = fluid.optimizer.Adam(
                learning_rate=args.base_learning_rate)
            optimizer.minimize(train_ret["avg_cost"])

    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        dev_count = min(multiprocessing.cpu_count(), args.cpu_num)
        if (dev_count < args.cpu_num):
            print(
                "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. "
                "Change the cpu_num from %d to %d" %
                (dev_count, args.cpu_num, dev_count))
        os.environ['CPU_NUM'] = str(dev_count)
        place = fluid.CPUPlace()

    train_reader = creator.create_pyreader(args,
                                           file_name=args.train_data,
                                           feed_list=train_ret['feed_list'],
                                           place=place,
                                           mode='lac',
                                           reader=dataset,
                                           iterable=True)

    test_reader = creator.create_pyreader(args,
                                          file_name=args.test_data,
                                          feed_list=train_ret['feed_list'],
                                          place=place,
                                          mode='lac',
                                          reader=dataset,
                                          iterable=True,
                                          for_test=True)

    exe = fluid.Executor(place)
    exe.run(startup_program)

    if args.init_checkpoint:
        utils.init_checkpoint(exe, args.init_checkpoint, train_program)
    if dev_count > 1:
        device = "GPU" if args.use_cuda else "CPU"
        print("%d %s are used to train model" % (dev_count, device))
        # multi cpu/gpu config
        exec_strategy = fluid.ExecutionStrategy()
        # exec_strategy.num_threads = dev_count * 6
        build_strategy = fluid.compiler.BuildStrategy()
        # build_strategy.enable_inplace = True

        compiled_prog = fluid.compiler.CompiledProgram(
            train_program).with_data_parallel(
                loss_name=train_ret['avg_cost'].name,
                build_strategy=build_strategy,
                exec_strategy=exec_strategy)
    else:
        compiled_prog = fluid.compiler.CompiledProgram(train_program)

    # start training
    num_train_examples = dataset.get_num_examples(args.train_data)
    max_train_steps = args.epoch * num_train_examples // args.batch_size
    print("Num train examples: %d" % num_train_examples)
    print("Max train steps: %d" % max_train_steps)

    ce_info = []
    step = 0
    print_start_time = time.time()
    for epoch_id in range(args.epoch):
        ce_time = 0
        for data in train_reader():
            # this is for minimizing the fetching op, saving the training speed.
            if step % args.print_steps == 0:
                fetch_list = [
                    train_ret["avg_cost"], train_ret["precision"],
                    train_ret["recall"], train_ret["f1_score"]
                ]
            else:
                fetch_list = []

            outputs = exe.run(
                compiled_prog,
                fetch_list=fetch_list,
                feed=data[0],
            )

            if step % args.print_steps == 0:
                print_end_time = time.time()
                avg_cost, precision, recall, f1_score = [
                    np.mean(x) for x in outputs
                ]

                print(
                    "[train] step = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f"
                    % (step, avg_cost, precision, recall, f1_score,
                       print_end_time - print_start_time))
                print_start_time = time.time()

            if step % args.validation_steps == 0:
                valid_end_time = time.time()
                test_f1 = test_process(exe, test_program, test_reader,
                                       train_ret)
                valid_start_time = time.time()

                if test_f1 > best_score:
                    best_score = test_f1
                    save_path = os.path.join(args.model_save_dir, "best_mode")
                    fluid.io.save_persistables(exe, save_path, train_program)

                ce_time += valid_end_time - valid_start_time
                ce_info.append(
                    [ce_time, avg_cost, precision, recall, f1_score])

            # save checkpoints
            if step % args.save_steps == 0 and step != 0:
                save_path = os.path.join(args.model_save_dir,
                                         "step_" + str(step))
                fluid.io.save_persistables(exe, save_path, train_program)
            step += 1

    if args.enable_ce:
        card_num = get_cards()
        ce_cost = 0
        ce_f1 = 0
        ce_p = 0
        ce_r = 0
        ce_time = 0
        try:
            ce_time = ce_info[-2][0]
            ce_cost = ce_info[-2][1]
            ce_p = ce_info[-2][2]
            ce_r = ce_info[-2][3]
            ce_f1 = ce_info[-2][4]
        except:
            print("ce info error")
        print("kpis\teach_step_duration_card%s\t%s" % (card_num, ce_time))
        print("kpis\ttrain_cost_card%s\t%f" % (card_num, ce_cost))
        print("kpis\ttrain_precision_card%s\t%f" % (card_num, ce_p))
        print("kpis\ttrain_recall_card%s\t%f" % (card_num, ce_r))
        print("kpis\ttrain_f1_card%s\t%f" % (card_num, ce_f1))