Beispiel #1
0
def do_eval(args):
    dataset = reader.Dataset(args)

    test_program = fluid.Program()
    with fluid.program_guard(test_program, fluid.default_startup_program()):
        with fluid.unique_name.guard():
            test_ret = creator.create_model(
                args, dataset.vocab_size, dataset.num_labels, mode='test')
    test_program = test_program.clone(for_test=True)

    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()

    pyreader = creator.create_pyreader(args, file_name=args.test_data,
                                       feed_list=test_ret['feed_list'],
                                       place=place,
                                       mode='lac',
                                       reader=dataset,
                                       iterable=True,
                                       for_test=True)

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # load model
    utils.init_checkpoint(exe, args.init_checkpoint+'.pdckpt', test_program)
    test_process(exe=exe,
                 program=test_program,
                 reader=pyreader,
                 test_ret=test_ret
                 )
Beispiel #2
0
def do_infer(args):
    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()

    # define network and reader
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()
    infer_program = fluid.Program()
    with fluid.program_guard(infer_program, fluid.default_startup_program()):
        with fluid.unique_name.guard():
            infer_ret = creator.create_ernie_model(args,
                                                   ernie_config,
                                                   is_prediction=False)
    infer_program = infer_program.clone(for_test=True)
    print(args.test_data)
    pyreader, reader = creator.create_pyreader(
        args,
        file_name=args.test_data,
        feed_list=infer_ret['feed_list'],
        mode="ernie",
        place=place,
        iterable=True,
        return_reader=True,
        for_test=True)

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # load model
    if not args.init_checkpoint:
        raise ValueError(
            "args 'init_checkpoint' should be set if only doing test or infer!"
        )
    utils.init_checkpoint(exe, args.init_checkpoint, infer_program)

    # create dict
    id2word_dict = dict([(str(word_id), word)
                         for word, word_id in reader.vocab.items()])
    id2label_dict = dict([(str(label_id), label)
                          for label, label_id in reader.label_map.items()])
    Dataset = namedtuple("Dataset", ["id2word_dict", "id2label_dict"])
    dataset = Dataset(id2word_dict, id2label_dict)

    # make prediction
    for data in pyreader():
        (words, crf_decode) = exe.run(
            infer_program,
            fetch_list=[infer_ret["words"], infer_ret["crf_decode"]],
            feed=data[0],
            return_numpy=False)
        # User should notice that words had been clipped if long than args.max_seq_len
        results = utils.parse_result(words, crf_decode, dataset)
        for sent, tags in results:
            result_list = [
                '(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags)
            ]
            print(''.join(result_list))
def do_eval(args):
    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()

    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()
    test_program = fluid.Program()
    with fluid.program_guard(test_program, fluid.default_startup_program()):
        with fluid.unique_name.guard():
            test_ret = creator.create_ernie_model(args, ernie_config)
    test_program = test_program.clone(for_test=True)

    pyreader = creator.create_pyreader(args, file_name=args.test_data,
                                          feed_list=test_ret['feed_list'],
                                          model="ernie",
                                          place=place,
                                          mode='test',)

    print('program startup')

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    print('program loading')
    # load model
    if not args.init_checkpoint:
        raise ValueError("args 'init_checkpoint' should be set if only doing test or infer!")
    utils.init_checkpoint(exe, args.init_checkpoint, test_program)

    evaluate(exe, test_program, pyreader, test_ret)
Beispiel #4
0
def save_inference_model(args):

    # model definition
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()
    dataset = reader.Dataset(args)
    infer_program = fluid.Program()
    with fluid.program_guard(infer_program, fluid.default_startup_program()):
        with fluid.unique_name.guard():

            infer_ret = creator.create_model(args,
                                             dataset.vocab_size,
                                             dataset.num_labels,
                                             mode='infer')
            infer_program = infer_program.clone(for_test=True)

    # load pretrain check point
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())
    utils.init_checkpoint(exe, args.init_checkpoint + '.pdckpt', infer_program)

    fluid.io.save_inference_model(
        args.inference_save_dir,
        ['words'],
        infer_ret['crf_decode'],
        exe,
        main_program=infer_program,
        model_filename='model.pdmodel',
        params_filename='params.pdparams',
    )
Beispiel #5
0
def do_save_inference_model(args):
    if args.use_cuda:
        dev_count = fluid.core.get_cuda_device_count()
        place = fluid.CUDAPlace(0)
    else:
        dev_count = int(os.environ.get('CPU_NUM', 1))
        place = fluid.CPUPlace()

    test_prog = fluid.default_main_program()
    startup_prog = fluid.default_startup_program()

    with fluid.program_guard(test_prog, startup_prog):
        with fluid.unique_name.guard():
            infer_loader, probs, feed_target_names = create_model(
                args, num_labels=args.num_labels, is_prediction=True)

    test_prog = test_prog.clone(for_test=True)
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    assert (args.init_checkpoint)

    if args.init_checkpoint:
        utils.init_checkpoint(exe, args.init_checkpoint, test_prog)

    fluid.io.save_inference_model(
        args.inference_model_dir,
        feeded_var_names=feed_target_names,
        target_vars=[probs],
        executor=exe,
        main_program=test_prog,
        model_filename="model.pdmodel",
        params_filename="params.pdparams")

    print("save inference model at %s" % (args.inference_model_dir))
def do_save_inference_model(args):

    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    if args.use_cuda:
        dev_count = fluid.core.get_cuda_device_count()
        place = fluid.CUDAPlace(0)
    else:
        dev_count = int(os.environ.get('CPU_NUM', 1))
        place = fluid.CPUPlace()

    exe = fluid.Executor(place)

    test_prog = fluid.Program()
    startup_prog = fluid.Program()

    with fluid.program_guard(test_prog, startup_prog):
        with fluid.unique_name.guard():
            infer_pyreader, ernie_inputs, labels = ernie_pyreader(
                args, pyreader_name="infer_reader")

            if args.use_paddle_hub:
                embeddings = ernie_encoder_with_paddle_hub(
                    ernie_inputs, args.max_seq_len)
            else:
                embeddings = ernie_encoder(ernie_inputs,
                                           ernie_config=ernie_config)

            probs = create_model(args,
                                 embeddings,
                                 labels=labels,
                                 is_prediction=True)
    test_prog = test_prog.clone(for_test=True)
    exe.run(startup_prog)

    assert (args.init_checkpoint)

    if args.init_checkpoint:
        utils.init_checkpoint(exe, args.init_checkpoint, test_prog)

    fluid.io.save_inference_model(args.inference_model_dir,
                                  feeded_var_names=[
                                      ernie_inputs["src_ids"].name,
                                      ernie_inputs["sent_ids"].name,
                                      ernie_inputs["pos_ids"].name,
                                      ernie_inputs["input_mask"].name,
                                      ernie_inputs["seq_lens"].name
                                  ],
                                  target_vars=[probs],
                                  executor=exe,
                                  main_program=test_prog,
                                  model_filename="model.pdmodel",
                                  params_filename="params.pdparams")

    print("save inference model at %s" % (args.inference_model_dir))
Beispiel #7
0
 def load(self, model_dir, is_checkpoint=False):
     """
     Load persistables or parameters.
     """
     # TODO: support dygraph.
     if is_checkpoint:
         init_checkpoint(self.exe, model_dir, self.program)
     else:
         init_pretraining_params(self.exe, model_dir, self.program)
     return
Beispiel #8
0
    def _build_programs(self):
        """
        Build programs.

        Build train_program, eval_program and inference_program. Only use in static graph mode.
        """
        if self.run_infer:
            self.startup_program = fluid.Program()
            # build infer program
            self.infer_program = fluid.Program()
            with fluid.program_guard(self.infer_program, self.startup_program):
                with fluid.unique_name.guard():
                    self.infer_feed_dict = inputs = self._get_feed_dict(is_infer=True)
                    outputs = self.forward(inputs, is_infer=True)
                    predictions = self.infer(inputs, outputs)
                    self.infer_fetch_dict = predictions
            self.infer_program = self.infer_program.clone(for_test=True)

            self.exe.run(self.startup_program)
            if self.init_pretraining_params is not None:
                init_pretraining_params(self.exe, self.init_pretraining_params, self.infer_program)
            if self.init_checkpoint is not None:
                init_checkpoint(self.exe, self.init_checkpoint, self.infer_program)
        else:
            self.startup_program = fluid.Program()
            # build train program
            self.train_program = fluid.Program()
            with fluid.program_guard(self.train_program, self.startup_program):
                with fluid.unique_name.guard():
                    self.feed_dict = inputs = self._get_feed_dict()
                    outputs = self.forward(inputs)
                    metrics, statistics = self.get_metrics_and_statistics(inputs, outputs)

                    # build eval program
                    self.eval_program = self.train_program.clone(for_test=True)
                    self.eval_fetch_dict = {**metrics, **statistics}

                    self.optimize(metrics)
                    self.train_fetch_dict = metrics

            self.exe.run(self.startup_program)
            if self.init_pretraining_params is not None:
                init_pretraining_params(self.exe, self.init_pretraining_params, self.train_program)
            if self.init_checkpoint is not None:
                init_checkpoint(self.exe, self.init_checkpoint, self.train_program)
        return
Beispiel #9
0
def do_infer(args):
    dataset = reader.Dataset(args)

    infer_program = fluid.Program()
    with fluid.program_guard(infer_program, fluid.default_startup_program()):
        with fluid.unique_name.guard():

            infer_ret = creator.create_model(args,
                                             dataset.vocab_size,
                                             dataset.num_labels,
                                             mode='infer')
    infer_program = infer_program.clone(for_test=True)

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()

    pyreader = creator.create_pyreader(args,
                                       file_name=args.infer_data,
                                       feed_list=infer_ret['feed_list'],
                                       place=place,
                                       model='lac',
                                       reader=dataset,
                                       mode='infer')

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # load model
    utils.init_checkpoint(exe, args.init_checkpoint, infer_program)

    result = infer_process(
        exe=exe,
        program=infer_program,
        reader=pyreader,
        fetch_vars=[infer_ret['words'], infer_ret['crf_decode']],
        dataset=dataset)
    with open('../processed.txt', 'w') as f:
        for sent, tags in result:
            result_list = [
                '(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags)
            ]
            f.write(''.join(result_list) + '\n')
Beispiel #10
0
def do_infer(args):
    dataset = reader.Dataset(args)

    infer_program = fluid.Program()
    with fluid.program_guard(infer_program, fluid.default_startup_program()):
        with fluid.unique_name.guard():

            infer_ret = creator.create_model(args,
                                             dataset.vocab_size,
                                             dataset.num_labels,
                                             mode='infer')
    infer_program = infer_program.clone(for_test=True)

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()

    pyreader = fluid.io.PyReader(feed_list=[infer_ret['words']],
                                 capacity=10,
                                 iterable=True,
                                 return_list=False)
    pyreader.decorate_sample_list_generator(paddle.batch(
        dataset.file_reader(args.infer_data, mode='infer'),
        batch_size=args.batch_size),
                                            places=place)

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # load model
    utils.init_checkpoint(exe, args.init_checkpoint, infer_program)

    result = infer_process(
        exe=exe,
        program=infer_program,
        reader=pyreader,
        fetch_vars=[infer_ret['words'], infer_ret['crf_decode']],
        dataset=dataset)
    for sent, tags in result:
        result_list = ['(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags)]
        print(''.join(result_list))
def do_train(args):
    """
    Main Function
    """
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = 1
    else:
        dev_count = min(multiprocessing.cpu_count(), args.cpu_num)
        if (dev_count < args.cpu_num):
            print(
                "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. "
                "Change the cpu_num from %d to %d" %
                (dev_count, args.cpu_num, dev_count))
        os.environ['CPU_NUM'] = str(dev_count)
        place = fluid.CPUPlace()

    exe = fluid.Executor(place)

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    train_program = fluid.Program()
    with fluid.program_guard(train_program, startup_prog):
        with fluid.unique_name.guard():
            # user defined model based on ernie embeddings
            train_ret = creator.create_ernie_model(args, ernie_config)

            # ernie pyreader
            train_pyreader = creator.create_pyreader(
                args,
                file_name=args.train_data,
                feed_list=train_ret['feed_list'],
                model="ernie",
                place=place)

            test_program = train_program.clone(for_test=True)
            test_pyreader = creator.create_pyreader(
                args,
                file_name=args.test_data,
                feed_list=train_ret['feed_list'],
                model="ernie",
                place=place)

            clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
            optimizer = fluid.optimizer.Adam(
                learning_rate=args.base_learning_rate, grad_clip=clip)
            optimizer.minimize(train_ret["avg_cost"])

    lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
        program=train_program, batch_size=args.batch_size)
    print("Theoretical memory usage in training: %.3f - %.3f %s" %
          (lower_mem, upper_mem, unit))
    print("Device count: %d" % dev_count)

    exe.run(startup_prog)
    # load checkpoints
    if args.init_checkpoint and args.init_pretraining_params:
        print("WARNING: args 'init_checkpoint' and 'init_pretraining_params' "
              "both are set! Only arg 'init_checkpoint' is made valid.")
    if args.init_checkpoint:
        utils.init_checkpoint(exe, args.init_checkpoint, startup_prog)
    elif args.init_pretraining_params:
        utils.init_pretraining_params(exe, args.init_pretraining_params,
                                      startup_prog)

    if dev_count > 1 and not args.use_cuda:
        device = "GPU" if args.use_cuda else "CPU"
        print("%d %s are used to train model" % (dev_count, device))

        # multi cpu/gpu config
        exec_strategy = fluid.ExecutionStrategy()
        build_strategy = fluid.BuildStrategy()
        compiled_prog = fluid.compiler.CompiledProgram(
            train_program).with_data_parallel(
                loss_name=train_ret['avg_cost'].name,
                build_strategy=build_strategy,
                exec_strategy=exec_strategy)
    else:
        compiled_prog = fluid.compiler.CompiledProgram(train_program)

    # start training
    steps = 0
    for epoch_id in range(args.epoch):
        for data in train_pyreader():
            steps += 1
            if steps % args.print_steps == 0:
                fetch_list = [
                    train_ret["avg_cost"],
                    train_ret["precision"],
                    train_ret["recall"],
                    train_ret["f1_score"],
                ]
            else:
                fetch_list = []

            start_time = time.time()

            outputs = exe.run(program=compiled_prog,
                              feed=data[0],
                              fetch_list=fetch_list)
            end_time = time.time()
            if steps % args.print_steps == 0:
                loss, precision, recall, f1_score = [
                    np.mean(x) for x in outputs
                ]
                print(
                    "[train] batch_id = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f, "
                    "pyreader queue_size: %d " %
                    (steps, loss, precision, recall, f1_score,
                     end_time - start_time, train_pyreader.queue.size()))

            if steps % args.save_steps == 0:
                save_path = os.path.join(args.model_save_dir,
                                         "step_" + str(steps), "checkpoint")
                print("\tsaving model as %s" % (save_path))
                fluid.save(train_program, save_path)

            if steps % args.validation_steps == 0:
                evaluate(exe, test_program, test_pyreader, train_ret)

    save_path = os.path.join(args.model_save_dir, "step_" + str(steps),
                             "checkpoint")
    fluid.save(train_program, save_path)
Beispiel #12
0
def infer(conf_dict, args):
    """
    run predict
    """
    if args.use_cuda:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    vocab = utils.load_vocab(args.vocab_path)
    simnet_process = reader.SimNetProcessor(args, vocab)

    startup_prog = fluid.Program()

    get_infer_examples = simnet_process.get_infer_reader
    batch_data = fluid.io.batch(get_infer_examples,
                                args.batch_size,
                                drop_last=False)

    test_prog = fluid.Program()

    conf_dict['dict_size'] = len(vocab)

    net = utils.import_class("../shared_modules/models/matching",
                             conf_dict["net"]["module_name"],
                             conf_dict["net"]["class_name"])(conf_dict)

    if args.task_mode == "pairwise":
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
                infer_loader, left, pos_right = create_model(args,
                                                             is_inference=True)
                left_feat, pos_score = net.predict(left, pos_right)
                pred = pos_score
        test_prog = test_prog.clone(for_test=True)
    else:
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
                infer_loader, left, right = create_model(args,
                                                         is_inference=True)
                left_feat, pred = net.predict(left, right)
        test_prog = test_prog.clone(for_test=True)

    exe.run(startup_prog)

    utils.init_checkpoint(exe, args.init_checkpoint, main_program=test_prog)

    test_exe = exe
    infer_loader.set_sample_list_generator(batch_data)

    logging.info("start test process ...")
    preds_list = []
    fetch_list = [pred.name]
    output = []
    infer_loader.start()
    while True:
        try:
            output = test_exe.run(program=test_prog, fetch_list=fetch_list)
            if args.task_mode == "pairwise":
                preds_list += list(
                    map(lambda item: str((item[0] + 1) / 2), output[0]))
            else:
                preds_list += map(lambda item: str(np.argmax(item)), output[0])
        except fluid.core.EOFException:
            infer_loader.reset()
            break
    with io.open(args.infer_result_path, "w", encoding="utf8") as infer_file:
        for _data, _pred in zip(simnet_process.get_infer_data(), preds_list):
            infer_file.write(_data + "\t" + _pred + "\n")
    logging.info("infer result saved in %s" %
                 os.path.join(os.getcwd(), args.infer_result_path))
Beispiel #13
0
def test(conf_dict, args):
    """
    Evaluation Function
    """
    if args.use_cuda:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    vocab = utils.load_vocab(args.vocab_path)
    simnet_process = reader.SimNetProcessor(args, vocab)

    startup_prog = fluid.Program()

    get_test_examples = simnet_process.get_reader("test")
    batch_data = fluid.io.batch(get_test_examples,
                                args.batch_size,
                                drop_last=False)
    test_prog = fluid.Program()

    conf_dict['dict_size'] = len(vocab)

    net = utils.import_class("../shared_modules/models/matching",
                             conf_dict["net"]["module_name"],
                             conf_dict["net"]["class_name"])(conf_dict)

    metric = fluid.metrics.Auc(name="auc")

    with io.open("predictions.txt", "w", encoding="utf8") as predictions_file:
        if args.task_mode == "pairwise":
            with fluid.program_guard(test_prog, startup_prog):
                with fluid.unique_name.guard():
                    test_loader, left, pos_right = create_model(
                        args, is_inference=True)
                    left_feat, pos_score = net.predict(left, pos_right)
                    pred = pos_score
            test_prog = test_prog.clone(for_test=True)

        else:
            with fluid.program_guard(test_prog, startup_prog):
                with fluid.unique_name.guard():
                    test_loader, left, right = create_model(args,
                                                            is_inference=True)
                    left_feat, pred = net.predict(left, right)
            test_prog = test_prog.clone(for_test=True)

        exe.run(startup_prog)

        utils.init_checkpoint(exe,
                              args.init_checkpoint,
                              main_program=test_prog)

        test_exe = exe
        test_loader.set_sample_list_generator(batch_data)

        logging.info("start test process ...")
        test_loader.start()
        pred_list = []
        fetch_list = [pred.name]
        output = []
        while True:
            try:
                output = test_exe.run(program=test_prog, fetch_list=fetch_list)
                if args.task_mode == "pairwise":
                    pred_list += list(
                        map(lambda item: float(item[0]), output[0]))
                    predictions_file.write(u"\n".join(
                        map(lambda item: str((item[0] + 1) / 2), output[0])) +
                                           "\n")
                else:
                    pred_list += map(lambda item: item, output[0])
                    predictions_file.write(u"\n".join(
                        map(lambda item: str(np.argmax(item)), output[0])) +
                                           "\n")
            except fluid.core.EOFException:
                test_loader.reset()
                break
        if args.task_mode == "pairwise":
            pred_list = np.array(pred_list).reshape((-1, 1))
            pred_list = (pred_list + 1) / 2
            pred_list = np.hstack(
                (np.ones_like(pred_list) - pred_list, pred_list))
        else:
            pred_list = np.array(pred_list)
        labels = simnet_process.get_test_label()

        metric.update(pred_list, labels)
        if args.compute_accuracy:
            acc = utils.get_accuracy(pred_list, labels, args.task_mode,
                                     args.lamda)
            logging.info("AUC of test is %f, Accuracy of test is %f" %
                         (metric.eval(), acc))
        else:
            logging.info("AUC of test is %f" % metric.eval())

    if args.verbose_result:
        utils.get_result_file(args)
        logging.info("test result saved in %s" %
                     os.path.join(os.getcwd(), args.test_result_path))
Beispiel #14
0
def do_train(args):
    best_score = -999
    train_program = fluid.default_main_program()
    startup_program = fluid.default_startup_program()

    dataset = reader.Dataset(args)
    with fluid.program_guard(train_program, startup_program):
        train_program.random_seed = args.random_seed
        startup_program.random_seed = args.random_seed

        with fluid.unique_name.guard():
            train_ret = creator.create_model(args,
                                             dataset.vocab_size,
                                             dataset.num_labels,
                                             mode='train')
            test_program = train_program.clone(for_test=True)

            optimizer = fluid.optimizer.Adam(
                learning_rate=args.base_learning_rate)
            optimizer.minimize(train_ret["avg_cost"])

    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        dev_count = min(multiprocessing.cpu_count(), args.cpu_num)
        if (dev_count < args.cpu_num):
            print(
                "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. "
                "Change the cpu_num from %d to %d" %
                (dev_count, args.cpu_num, dev_count))
        os.environ['CPU_NUM'] = str(dev_count)
        place = fluid.CPUPlace()

    train_reader = creator.create_pyreader(args,
                                           file_name=args.train_data,
                                           feed_list=train_ret['feed_list'],
                                           place=place,
                                           mode='lac',
                                           reader=dataset,
                                           iterable=True)

    test_reader = creator.create_pyreader(args,
                                          file_name=args.test_data,
                                          feed_list=train_ret['feed_list'],
                                          place=place,
                                          mode='lac',
                                          reader=dataset,
                                          iterable=True,
                                          for_test=True)

    exe = fluid.Executor(place)
    exe.run(startup_program)

    if args.init_checkpoint:
        utils.init_checkpoint(exe, args.init_checkpoint, train_program)
    if dev_count > 1:
        device = "GPU" if args.use_cuda else "CPU"
        print("%d %s are used to train model" % (dev_count, device))
        # multi cpu/gpu config
        exec_strategy = fluid.ExecutionStrategy()
        # exec_strategy.num_threads = dev_count * 6
        build_strategy = fluid.compiler.BuildStrategy()
        # build_strategy.enable_inplace = True

        compiled_prog = fluid.compiler.CompiledProgram(
            train_program).with_data_parallel(
                loss_name=train_ret['avg_cost'].name,
                build_strategy=build_strategy,
                exec_strategy=exec_strategy)
    else:
        compiled_prog = fluid.compiler.CompiledProgram(train_program)

    # start training
    num_train_examples = dataset.get_num_examples(args.train_data)
    max_train_steps = args.epoch * num_train_examples // args.batch_size
    print("Num train examples: %d" % num_train_examples)
    print("Max train steps: %d" % max_train_steps)

    ce_info = []
    step = 0
    print_start_time = time.time()
    for epoch_id in range(args.epoch):
        ce_time = 0
        for data in train_reader():
            # this is for minimizing the fetching op, saving the training speed.
            if step % args.print_steps == 0:
                fetch_list = [
                    train_ret["avg_cost"], train_ret["precision"],
                    train_ret["recall"], train_ret["f1_score"]
                ]
            else:
                fetch_list = []

            outputs = exe.run(
                compiled_prog,
                fetch_list=fetch_list,
                feed=data[0],
            )

            if step % args.print_steps == 0:
                print_end_time = time.time()
                avg_cost, precision, recall, f1_score = [
                    np.mean(x) for x in outputs
                ]

                print(
                    "[train] step = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f"
                    % (step, avg_cost, precision, recall, f1_score,
                       print_end_time - print_start_time))
                print_start_time = time.time()

            if step % args.validation_steps == 0:
                valid_end_time = time.time()
                test_f1 = test_process(exe, test_program, test_reader,
                                       train_ret)
                valid_start_time = time.time()

                if test_f1 > best_score:
                    best_score = test_f1
                    save_path = os.path.join(args.model_save_dir, "best_mode")
                    fluid.io.save_persistables(exe, save_path, train_program)

                ce_time += valid_end_time - valid_start_time
                ce_info.append(
                    [ce_time, avg_cost, precision, recall, f1_score])

            # save checkpoints
            if step % args.save_steps == 0 and step != 0:
                save_path = os.path.join(args.model_save_dir,
                                         "step_" + str(step))
                fluid.io.save_persistables(exe, save_path, train_program)
            step += 1

    if args.enable_ce:
        card_num = get_cards()
        ce_cost = 0
        ce_f1 = 0
        ce_p = 0
        ce_r = 0
        ce_time = 0
        try:
            ce_time = ce_info[-2][0]
            ce_cost = ce_info[-2][1]
            ce_p = ce_info[-2][2]
            ce_r = ce_info[-2][3]
            ce_f1 = ce_info[-2][4]
        except:
            print("ce info error")
        print("kpis\teach_step_duration_card%s\t%s" % (card_num, ce_time))
        print("kpis\ttrain_cost_card%s\t%f" % (card_num, ce_cost))
        print("kpis\ttrain_precision_card%s\t%f" % (card_num, ce_p))
        print("kpis\ttrain_recall_card%s\t%f" % (card_num, ce_r))
        print("kpis\ttrain_f1_card%s\t%f" % (card_num, ce_f1))
Beispiel #15
0
def main(args):
    """
    Main Function
    """
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    task_name = args.task_name.lower()
    processor = reader.EmoTectProcessor(data_dir=args.data_dir,
                                        vocab_path=args.vocab_path,
                                        random_seed=args.random_seed)
    #num_labels = len(processor.get_labels())
    num_labels = args.num_labels

    if not (args.do_train or args.do_val or args.do_infer):
        raise ValueError("For args `do_train`, `do_val` and `do_infer`, at "
                         "least one of them must be True.")

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    if args.do_train:
        train_data_generator = processor.data_generator(
            batch_size=args.batch_size, phase='train', epoch=args.epoch)

        num_train_examples = processor.get_num_examples(phase="train")
        max_train_steps = args.epoch * num_train_examples // args.batch_size + 1

        print("Num train examples: %d" % num_train_examples)
        print("Max train steps: %d" % max_train_steps)

        train_program = fluid.Program()
        if args.random_seed is not None:
            train_program.random_seed = args.random_seed

        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                train_loader, loss, accuracy, num_seqs = create_model(
                    args, num_labels=num_labels, is_prediction=False)

                sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr)
                sgd_optimizer.minimize(loss)

        if args.verbose:
            lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                program=train_program, batch_size=args.batch_size)
            print("Theoretical memory usage in training: %.3f - %.3f %s" %
                  (lower_mem, upper_mem, unit))

    if args.do_val:
        if args.do_train:
            test_data_generator = processor.data_generator(
                batch_size=args.batch_size, phase='dev', epoch=1)
        else:
            test_data_generator = processor.data_generator(
                batch_size=args.batch_size, phase='test', epoch=1)

        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
                test_loader, loss, accuracy, num_seqs = create_model(
                    args, num_labels=num_labels, is_prediction=False)
        test_prog = test_prog.clone(for_test=True)

    if args.do_infer:
        infer_data_generator = processor.data_generator(
            batch_size=args.batch_size, phase='infer', epoch=1)

        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
                infer_loader, probs, _ = create_model(args,
                                                      num_labels=num_labels,
                                                      is_prediction=True)
        test_prog = test_prog.clone(for_test=True)

    exe.run(startup_prog)

    if args.do_train:
        if args.init_checkpoint:
            utils.init_checkpoint(exe,
                                  args.init_checkpoint,
                                  main_program=startup_prog)
    elif args.do_val or args.do_infer:
        if not args.init_checkpoint:
            raise ValueError("args 'init_checkpoint' should be set if"
                             "only doing validation or infer!")
        utils.init_checkpoint(exe,
                              args.init_checkpoint,
                              main_program=test_prog)

    if args.do_train:
        train_exe = exe
        train_loader.set_sample_list_generator(train_data_generator)
    else:
        train_exe = None
    if args.do_val:
        test_exe = exe
        test_loader.set_sample_list_generator(test_data_generator)
    if args.do_infer:
        test_exe = exe
        infer_loader.set_sample_list_generator(infer_data_generator)

    if args.do_train:
        train_loader.start()
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        time_begin = time.time()
        ce_info = []
        while True:
            try:
                steps += 1
                if steps % args.skip_steps == 0:
                    fetch_list = [loss.name, accuracy.name, num_seqs.name]
                else:
                    fetch_list = []

                outputs = train_exe.run(program=train_program,
                                        fetch_list=fetch_list,
                                        return_numpy=False)
                if steps % args.skip_steps == 0:
                    np_loss, np_acc, np_num_seqs = outputs
                    np_loss = np.array(np_loss)
                    np_acc = np.array(np_acc)
                    np_num_seqs = np.array(np_num_seqs)
                    total_cost.extend(np_loss * np_num_seqs)
                    total_acc.extend(np_acc * np_num_seqs)
                    total_num_seqs.extend(np_num_seqs)

                    if args.verbose:
                        verbose = "train loader queue size: %d, " % train_loader.queue.size(
                        )
                        print(verbose)

                    time_end = time.time()
                    used_time = time_end - time_begin
                    print("step: %d, avg loss: %f, "
                          "avg acc: %f, speed: %f steps/s" %
                          (steps, np.sum(total_cost) / np.sum(total_num_seqs),
                           np.sum(total_acc) / np.sum(total_num_seqs),
                           args.skip_steps / used_time))
                    ce_info.append([
                        np.sum(total_cost) / np.sum(total_num_seqs),
                        np.sum(total_acc) / np.sum(total_num_seqs), used_time
                    ])
                    total_cost, total_acc, total_num_seqs = [], [], []
                    time_begin = time.time()

                if steps % args.save_steps == 0:
                    save_path = os.path.join(args.save_checkpoint_dir,
                                             "step_" + str(steps))
                    fluid.save(train_program, save_path)

                if steps % args.validation_steps == 0:
                    # evaluate on dev set
                    if args.do_val:
                        evaluate(test_exe, test_prog, test_loader,
                                 [loss.name, accuracy.name, num_seqs.name],
                                 "dev")

            except fluid.core.EOFException:
                print("final step: %d " % steps)
                if args.do_val:
                    evaluate(test_exe, test_prog, test_loader,
                             [loss.name, accuracy.name, num_seqs.name], "dev")

                save_path = os.path.join(args.save_checkpoint_dir,
                                         "step_" + str(steps))
                fluid.save(train_program, save_path)
                train_loader.reset()
                break

    if args.do_train and args.enable_ce:
        card_num = get_cards()
        ce_loss = 0
        ce_acc = 0
        ce_time = 0
        try:
            ce_loss = ce_info[-2][0]
            ce_acc = ce_info[-2][1]
            ce_time = ce_info[-2][2]
        except:
            print("ce info error")
        print("kpis\teach_step_duration_%s_card%s\t%s" %
              (task_name, card_num, ce_time))
        print("kpis\ttrain_loss_%s_card%s\t%f" %
              (task_name, card_num, ce_loss))
        print("kpis\ttrain_acc_%s_card%s\t%f" % (task_name, card_num, ce_acc))

    # evaluate on test set
    if not args.do_train and args.do_val:
        print("Final test result:")
        evaluate(test_exe, test_prog, test_loader,
                 [loss.name, accuracy.name, num_seqs.name], "test")

    # infer
    if args.do_infer:
        print("Final infer result:")
        infer(test_exe, test_prog, infer_loader, [probs.name], "infer")
Beispiel #16
0
def main(args):

    startup_program = fluid.Program()
    if args.random_seed is not None:
        startup_program.random_seed = args.random_seed

    # prepare dataset
    dataset = reader.Dataset(args)

    if args.do_train:
        train_program = fluid.Program()
        if args.random_seed is not None:
            train_program.random_seed = args.random_seed
        with fluid.program_guard(train_program, startup_program):
            with fluid.unique_name.guard():
                train_ret = create_model(args, "train_reader",
                                         dataset.vocab_size,
                                         dataset.num_labels)
                train_ret["pyreader"].decorate_paddle_reader(
                    paddle.batch(paddle.reader.shuffle(
                        dataset.file_reader(args.train_data),
                        buf_size=args.traindata_shuffle_buffer),
                                 batch_size=args.batch_size))

                optimizer = fluid.optimizer.Adam(
                    learning_rate=args.base_learning_rate)
                optimizer.minimize(train_ret["avg_cost"])

    if args.do_test:
        test_program = fluid.Program()
        with fluid.program_guard(test_program, startup_program):
            with fluid.unique_name.guard():
                test_ret = create_model(args, "test_reader",
                                        dataset.vocab_size, dataset.num_labels)
                test_ret["pyreader"].decorate_paddle_reader(
                    paddle.batch(dataset.file_reader(args.test_data),
                                 batch_size=args.batch_size))
        test_program = test_program.clone(
            for_test=True)  # to share parameters with train model

    if args.do_infer:
        infer_program = fluid.Program()
        with fluid.program_guard(infer_program, startup_program):
            with fluid.unique_name.guard():
                infer_ret = create_model(args, "infer_reader",
                                         dataset.vocab_size,
                                         dataset.num_labels)
                infer_ret["pyreader"].decorate_paddle_reader(
                    paddle.batch(dataset.file_reader(args.infer_data),
                                 batch_size=args.batch_size))
        infer_program = infer_program.clone(for_test=True)

    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = multiprocessing.cpu_count()
    exe = fluid.Executor(place)
    exe.run(startup_program)

    # load checkpoints
    if args.do_train:
        if args.init_checkpoint:
            utils.init_checkpoint(exe, args.init_checkpoint, train_program)
    elif args.do_test:
        if not args.init_checkpoint:
            raise ValueError(
                "args 'init_checkpoint' should be set if only doing validation or testing!"
            )
        utils.init_checkpoint(exe, args.init_checkpoint, test_program)
    if args.do_infer:
        utils.init_checkpoint(exe, args.init_checkpoint, infer_program)

    # do start to train
    if args.do_train:
        num_train_examples = dataset.get_num_examples(args.train_data)
        max_train_steps = args.epoch * num_train_examples // args.batch_size
        print("Num train examples: %d" % num_train_examples)
        print("Max train steps: %d" % max_train_steps)

        ce_info = []
        batch_id = 0
        for epoch_id in range(args.epoch):
            train_ret["pyreader"].start()
            ce_time = 0
            try:
                while True:
                    start_time = time.time()
                    avg_cost, nums_infer, nums_label, nums_correct = exe.run(
                        train_program,
                        fetch_list=[
                            train_ret["avg_cost"],
                            train_ret["num_infer_chunks"],
                            train_ret["num_label_chunks"],
                            train_ret["num_correct_chunks"],
                        ],
                    )
                    end_time = time.time()
                    train_ret["chunk_evaluator"].reset()
                    train_ret["chunk_evaluator"].update(
                        nums_infer, nums_label, nums_correct)
                    precision, recall, f1_score = train_ret[
                        "chunk_evaluator"].eval()
                    batch_id += 1
                    print(
                        "[train] batch_id = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f "
                        % (batch_id, avg_cost, precision, recall, f1_score,
                           end_time - start_time))
                    ce_time += end_time - start_time
                    ce_info.append(
                        [ce_time, avg_cost, precision, recall, f1_score])

                    # save checkpoints
                    if (batch_id % args.save_model_per_batches == 0):
                        save_path = os.path.join(args.model_save_dir,
                                                 "step_" + str(batch_id))
                        fluid.io.save_persistables(exe, save_path,
                                                   train_program)

                    # evaluate
                    if (batch_id % args.valid_model_per_batches
                            == 0) and args.do_test:
                        evaluate(exe, test_program, test_ret)

            except fluid.core.EOFException:
                save_path = os.path.join(args.model_save_dir,
                                         "step_" + str(batch_id))
                fluid.io.save_persistables(exe, save_path, train_program)
                train_ret["pyreader"].reset()
                # break?
    if args.do_train and args.enable_ce:
        card_num = get_cards()
        ce_cost = 0
        ce_f1 = 0
        ce_p = 0
        ce_r = 0
        ce_time = 0
        try:
            ce_time = ce_info[-2][0]
            ce_cost = ce_info[-2][1]
            ce_p = ce_info[-2][2]
            ce_r = ce_info[-2][3]
            ce_f1 = ce_info[-2][4]
        except:
            print("ce info error")
        print("kpis\teach_step_duration_card%s\t%s" % (card_num, ce_time))
        print("kpis\ttrain_cost_card%s\t%f" % (card_num, ce_cost))
        print("kpis\ttrain_precision_card%s\t%f" % (card_num, ce_p))
        print("kpis\ttrain_recall_card%s\t%f" % (card_num, ce_r))
        print("kpis\ttrain_f1_card%s\t%f" % (card_num, ce_f1))

    # only test
    if args.do_test:
        evaluate(exe, test_program, test_ret)

    if args.do_infer:
        infer_ret["pyreader"].start()
        while True:
            try:
                (
                    words,
                    crf_decode,
                ) = exe.run(infer_program,
                            fetch_list=[
                                infer_ret["words"],
                                infer_ret["crf_decode"],
                            ],
                            return_numpy=False)
                results = utils.parse_result(words, crf_decode, dataset)
                for result in results:
                    print(result)
            except fluid.core.EOFException:
                infer_ret["pyreader"].reset()
                break
Beispiel #17
0
def main():

    signal.signal(signal.SIGINT, signal_handler)

    parser = argparse.ArgumentParser(description="Integer Factorization with "
                                     "the Number Field Sieve")
    parser.add_argument("parameters", help="A file with the parameters to use")
    parser.add_argument("options",
                        metavar="OPTION",
                        help="An option as in "
                        "parameter file (format: key=value)",
                        nargs="*")
    parser.add_argument('--resume',
                        '-r',
                        help="checkpoint file to resume from")
    parser.add_argument(
        '--stage',
        '-s',
        action='append',
        help=
        "stage to complete ('start','polysel','sieving','linalg','complete'), add + to run all subsequent stages"
    )

    args = parser.parse_args()
    parameters = utils.get_params(args.parameters, args.options)

    name = parameters.myparams({"name": str}, ['tasks'])["name"]
    workdir = parameters.myparams({"workdir": str}, ['tasks'])["workdir"]

    if not os.path.exists(workdir):
        logger.info("Creating work directory %s", workdir)
        os.makedirs(workdir)

    setup_logging(workdir, name)

    # Load or create initial checkpoint
    checkpoint_file = args.resume
    if not checkpoint_file:
        checkpoint_file = os.path.join(workdir, "checkpoint.dat")
    utils.init_checkpoint(checkpoint_file)

    # set parameters that are unlikely to change from run to run, such as filenames and directories
    parameters = set_static_parameters(parameters)

    # check that all required parameters are present
    params = check_parameters(parameters)
    utils.update_checkpoint({'params': params})

    # set parameters that will likely change from run to run
    parameters = set_dynamic_parameters(parameters)

    # Write a snapshot of the parameters to a file
    snapshot_filename = "%s/%s.parameters_snapshot" % (workdir, name)
    with open(snapshot_filename, "w") as snapshot_file:
        logger.debug("Writing parameter snapshot to %s", snapshot_filename)
        snapshot_file.write(str(parameters))
        snapshot_file.write("\n")

    start_time = time.time()

    # For each checkpointed stage, check if the stage should be run again.
    # A stage should be run again under the following circumstances:
    #   - The user manually requested to run the stage
    #   - No checkpoint exists for the stage
    #   - A stage on which this stage depends will be re-run
    #   - Parameters on which the stage depends have been changed since the last run
    if args.stage:
        for stage in args.stage:
            if stage.endswith('+'):
                stage = stage[:-1]
                if stage not in stages:
                    continue
                stage_required.manual_stages = range(stages.index(stage),
                                                     len(stages))
                break
            if stage not in stages:
                args.stage.pop(stage)
            else:
                stage_required.manual_stages.append(stages.index(stage))
    else:
        # since no stage were specified to run manually, choose the first stage based on the checkpoint file
        stage_required.manual_stages = set_manual_stages(params)

    # Run polynomial selection
    polysel_result = do_polysel(parameters)

    # Run sieving
    sieve_result = do_sieve(parameters, polysel_result)

    # Run linalg
    linalg_result = do_linalg(parameters, sieve_result)

    # Run square root
    sqrt_result = do_sqrt(parameters, linalg_result)

    factoring_duration = polysel_result['duration'] + sieve_result[
        'duration'] + linalg_result['duration'] + sqrt_result['duration']
    logger.info('Factoring completed in %s',
                utils.str_time(factoring_duration))
    logger.info('\tPolysel in real/cpu %s/%s',
                utils.str_time(polysel_result['duration']),
                utils.str_time(polysel_result['cputime']))
    logger.info("\tSieving in real/cpu %s/%s",
                utils.str_time(sieve_result['duration']),
                utils.str_time(sieve_result['cputime']))
    logger.info("\tLinalg in %s", utils.str_time(linalg_result['duration']))
    logger.info("\tSqrt in %s", utils.str_time(sqrt_result['duration']))
    logger.info("\tFactors %s", ','.join(sqrt_result['factors']))

    post_factor = parameters.myparams({
        'post_factor': None
    }, ['commands']).get('post_factor')
    if post_factor != None:
        logger.info('Post-factor command %s', post_factor)
        utils.run_command(post_factor, logger=logger)
Beispiel #18
0
def main():
    
    signal.signal(signal.SIGINT, signal_handler)

    parser = argparse.ArgumentParser(description="Integer Factorization with "
                                         "the Number Field Sieve")
    parser.add_argument("parameters", help="A file with the parameters to use")
    parser.add_argument("options", metavar="OPTION", help="An option as in "
                            "parameter file (format: key=value)", nargs="*")
    parser.add_argument('--resume','-r', help="checkpoint file to resume from")
    parser.add_argument('--stage','-s', action='append', help="stage to complete ('start','polysel','sieving','linalg','complete'), add + to run all subsequent stages")
    
    
    args = parser.parse_args()
    parameters = utils.get_params(args.parameters, args.options)

    name = parameters.myparams({"name": str}, ['tasks'])["name"]
    workdir = parameters.myparams({"workdir": str}, ['tasks'])["workdir"]

    if not os.path.exists(workdir):
        logger.info("Creating work directory %s", workdir)
        os.makedirs(workdir)

    setup_logging(workdir, name)

    # Load or create initial checkpoint
    checkpoint_file = args.resume
    if not checkpoint_file:
        checkpoint_file = os.path.join(workdir, "checkpoint.dat")
    utils.init_checkpoint(checkpoint_file)

    # set parameters that are unlikely to change from run to run, such as filenames and directories
    parameters = set_static_parameters(parameters)

    # check that all required parameters are present
    params = check_parameters(parameters)
    utils.update_checkpoint({'params': params})

    # set parameters that will likely change from run to run
    parameters = set_dynamic_parameters(parameters)

    # Write a snapshot of the parameters to a file
    snapshot_filename = "%s/%s.parameters_snapshot" % (workdir, name)
    with open(snapshot_filename, "w") as snapshot_file:
        logger.debug("Writing parameter snapshot to %s", snapshot_filename)
        snapshot_file.write(str(parameters))
        snapshot_file.write("\n")

    start_time = time.time()

    # For each checkpointed stage, check if the stage should be run again.
    # A stage should be run again under the following circumstances:
    #   - The user manually requested to run the stage
    #   - No checkpoint exists for the stage
    #   - A stage on which this stage depends will be re-run
    #   - Parameters on which the stage depends have been changed since the last run
    if args.stage:
        for stage in args.stage:
            if stage.endswith('+'):
                stage = stage[:-1]
                if stage not in stages:
                    continue
                stage_required.manual_stages = range(stages.index(stage), len(stages))
                break
            if stage not in stages:
                args.stage.pop(stage)
            else:
                stage_required.manual_stages.append(stages.index(stage))
    else:
        # since no stage were specified to run manually, choose the first stage based on the checkpoint file
        stage_required.manual_stages = set_manual_stages(params)

    # Run polynomial selection
    polysel_result = do_polysel(parameters)

    # Run sieving
    sieve_result = do_sieve(parameters, polysel_result)

    # Run linalg
    linalg_result = do_linalg(parameters, sieve_result)

    # Run square root
    sqrt_result = do_sqrt(parameters, linalg_result)

    factoring_duration = polysel_result['duration'] + sieve_result['duration'] + linalg_result['duration'] + sqrt_result['duration']
    logger.info('Factoring completed in %s', utils.str_time(factoring_duration))
    logger.info('\tPolysel in real/cpu %s/%s', utils.str_time(polysel_result['duration']), utils.str_time(polysel_result['cputime']))
    logger.info("\tSieving in real/cpu %s/%s", utils.str_time(sieve_result['duration']), utils.str_time(sieve_result['cputime']))
    logger.info("\tLinalg in %s", utils.str_time(linalg_result['duration']))
    logger.info("\tSqrt in %s", utils.str_time(sqrt_result['duration']))
    logger.info("\tFactors %s", ','.join(sqrt_result['factors']))

    post_factor = parameters.myparams({'post_factor': None}, ['commands']).get('post_factor')
    if post_factor != None:
        logger.info('Post-factor command %s', post_factor)
        utils.run_command(post_factor, logger=logger)
Beispiel #19
0
def main(args):
    """
    Main Function
    """
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = 1
    exe = fluid.Executor(place)

    task_name = args.task_name.lower()
    processor = reader.SentaProcessor(data_dir=args.data_dir,
                                      vocab_path=args.vocab_path,
                                      random_seed=args.random_seed,
                                      max_seq_len=args.max_seq_len)
    num_labels = len(processor.get_labels())

    if not (args.do_train or args.do_val or args.do_infer):
        raise ValueError("For args `do_train`, `do_val` and `do_infer`, at "
                         "least one of them must be True.")

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    if args.do_train:
        train_data_generator = processor.data_generator(
            batch_size=args.batch_size / dev_count,
            phase='train',
            epoch=args.epoch,
            shuffle=True)

        num_train_examples = processor.get_num_examples(phase="train")

        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        print("Device count: %d" % dev_count)
        print("Num train examples: %d" % num_train_examples)
        print("Max train steps: %d" % max_train_steps)

        train_program = fluid.Program()
        if args.enable_ce and args.random_seed is not None:
            train_program.random_seed = args.random_seed

        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                train_reader, loss, accuracy, num_seqs = create_model(
                    args,
                    pyreader_name='train_reader',
                    num_labels=num_labels,
                    is_prediction=False)

                sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr)
                sgd_optimizer.minimize(loss)

        if args.verbose:
            lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                program=train_program, batch_size=args.batch_size)
            print("Theoretical memory usage in training: %.3f - %.3f %s" %
                  (lower_mem, upper_mem, unit))

    if args.do_val:
        test_data_generator = processor.data_generator(
            batch_size=args.batch_size / dev_count,
            phase='dev',
            epoch=1,
            shuffle=False)
        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
                test_reader, loss, accuracy, num_seqs = create_model(
                    args,
                    pyreader_name='test_reader',
                    num_labels=num_labels,
                    is_prediction=False)

        test_prog = test_prog.clone(for_test=True)

    if args.do_infer:
        infer_data_generator = processor.data_generator(
            batch_size=args.batch_size / dev_count,
            phase='infer',
            epoch=1,
            shuffle=False)
        infer_prog = fluid.Program()
        with fluid.program_guard(infer_prog, startup_prog):
            with fluid.unique_name.guard():
                infer_reader, prop, _ = create_model(
                    args,
                    pyreader_name='infer_reader',
                    num_labels=num_labels,
                    is_prediction=True)
        infer_prog = infer_prog.clone(for_test=True)

    exe.run(startup_prog)

    if args.do_train:
        if args.init_checkpoint:
            init_checkpoint(exe,
                            args.init_checkpoint,
                            main_program=startup_prog)

    elif args.do_val or args.do_infer:
        if not args.init_checkpoint:
            raise ValueError("args 'init_checkpoint' should be set if"
                             "only doing validation or testing!")
        init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog)

    if args.do_train:
        train_exe = exe
        train_reader.set_sample_list_generator(train_data_generator)
    else:
        train_exe = None
    if args.do_val:
        test_exe = exe
        test_reader.set_sample_list_generator(test_data_generator)
    if args.do_infer:
        test_exe = exe
        infer_reader.set_sample_list_generator(infer_data_generator)

    if args.do_train:
        train_reader.start()
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        time_begin = time.time()
        while True:
            try:
                steps += 1
                #print("steps...")
                if steps % args.skip_steps == 0:
                    fetch_list = [loss.name, accuracy.name, num_seqs.name]
                else:
                    fetch_list = []

                outputs = train_exe.run(program=train_program,
                                        fetch_list=fetch_list,
                                        return_numpy=False)
                #print("finished one step")
                if steps % args.skip_steps == 0:
                    np_loss, np_acc, np_num_seqs = outputs
                    np_loss = np.array(np_loss)
                    np_acc = np.array(np_acc)
                    np_num_seqs = np.array(np_num_seqs)
                    total_cost.extend(np_loss * np_num_seqs)
                    total_acc.extend(np_acc * np_num_seqs)
                    total_num_seqs.extend(np_num_seqs)

                    if args.verbose:
                        verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size(
                        )
                        print(verbose)

                    time_end = time.time()
                    used_time = time_end - time_begin
                    print("step: %d, ave loss: %f, "
                          "ave acc: %f, speed: %f steps/s" %
                          (steps, np.sum(total_cost) / np.sum(total_num_seqs),
                           np.sum(total_acc) / np.sum(total_num_seqs),
                           args.skip_steps / used_time))
                    total_cost, total_acc, total_num_seqs = [], [], []
                    time_begin = time.time()

                if steps % args.save_steps == 0:
                    save_path = os.path.join(args.checkpoints,
                                             "step_" + str(steps),
                                             "checkpoint")
                    fluid.save(train_program, save_path)

                if steps % args.validation_steps == 0:
                    # evaluate dev set
                    if args.do_val:
                        print("do evalatation")
                        evaluate(exe, test_prog, test_reader,
                                 [loss.name, accuracy.name, num_seqs.name],
                                 "dev")

            except fluid.core.EOFException:
                save_path = os.path.join(args.checkpoints,
                                         "step_" + str(steps), "checkpoint")
                fluid.save(train_program, save_path)
                train_reader.reset()
                break

    # final eval on dev set
    if args.do_val:
        print("Final validation result:")
        evaluate(exe, test_prog, test_reader,
                 [loss.name, accuracy.name, num_seqs.name], "dev")

    # final eval on test set
    if args.do_infer:
        print("Final test result:")
        inference(exe, infer_prog, infer_reader, [prop.name], "infer")
Beispiel #20
0
def do_compress(args):
    train_program = fluid.default_main_program()
    startup_program = fluid.default_startup_program()

    dataset = reader.Dataset(args)
    with fluid.program_guard(train_program, startup_program):
        train_program.random_seed = args.random_seed
        startup_program.random_seed = args.random_seed

        with fluid.unique_name.guard():
            train_ret = creator.create_model(args,
                                             dataset.vocab_size,
                                             dataset.num_labels,
                                             mode='train')

    test_program = train_program.clone()

    optimizer = fluid.optimizer.Adam(learning_rate=args.base_learning_rate)

    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        dev_count = min(multiprocessing.cpu_count(), args.cpu_num)
        if (dev_count < args.cpu_num):
            print(
                "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. "
                "Change the cpu_num from %d to %d" %
                (dev_count, args.cpu_num, dev_count))
        os.environ['CPU_NUM'] = str(dev_count)
        place = fluid.CPUPlace()

    train_reader = paddle.batch(dataset.file_reader(args.train_data),
                                batch_size=args.batch_size)
    test_reader = paddle.batch(dataset.file_reader(args.test_data),
                               batch_size=args.batch_size)

    exe = fluid.Executor(place)
    exe.run(startup_program)

    if args.init_checkpoint:
        utils.init_checkpoint(exe, args.init_checkpoint + '.pdckpt',
                              train_program)

    train_feed_list = [('words', train_ret['words'].name),
                       ("targets", train_ret["targets"].name)]
    train_fetch_list = [('loss', train_ret['avg_cost'].name)]

    test_feed_list = [('words', train_ret['words'].name),
                      ("targets", train_ret["targets"].name)]
    test_fetch_list = [('f1_score', train_ret['f1_score'].name)]
    print(train_ret['crf_decode'].name)

    com_pass = Compressor(place,
                          fluid.global_scope(),
                          train_program=train_program,
                          train_reader=train_reader,
                          train_feed_list=train_feed_list,
                          train_fetch_list=train_fetch_list,
                          eval_program=test_program,
                          eval_reader=test_reader,
                          eval_feed_list=test_feed_list,
                          eval_fetch_list=test_fetch_list,
                          teacher_programs=[],
                          train_optimizer=optimizer,
                          distiller_optimizer=None)
    com_pass.config(args.compress_config)
    com_pass.run()
Beispiel #21
0
    def _build_programs(self):
        """
        Build programs.

        Build train_program, eval_program and inference_program. Only use in static graph mode.
        """
        if self.run_infer:
            self.startup_program = fluid.Program()
            # build infer program
            self.infer_program = fluid.Program()
            with fluid.program_guard(self.infer_program, self.startup_program):
                with fluid.unique_name.guard():
                    self.infer_feed_dict = inputs = self._get_feed_dict(
                        is_infer=True)
                    outputs = self.forward(inputs, is_infer=True)
                    predictions = self.infer(inputs, outputs)
                    self.infer_fetch_dict = predictions
            self.infer_program = self.infer_program.clone(for_test=True)

            self.program = self.infer_program
        else:
            if self.is_distributed:
                exec_strategy = fluid.ExecutionStrategy()
                exec_strategy.use_experimental_executor = True
                exec_strategy.num_threads = 4
                exec_strategy.num_iteration_per_drop_scope = 1

                dist_strategy = DistributedStrategy()
                dist_strategy.exec_strategy = exec_strategy
                dist_strategy.nccl_comm_num = 1
                dist_strategy.fuse_all_reduce_ops = True
                if self.use_recompute:
                    dist_strategy.forward_recompute = True
                    dist_strategy.enable_sequential_execution = True
                if self.use_amp:
                    dist_strategy.use_amp = True
                    dist_strategy.amp_loss_scaling = self.amp_loss_scaling
                self.dist_strategy = dist_strategy

            self.startup_program = fluid.Program()
            # build train program
            self.train_program = fluid.Program()
            with fluid.program_guard(self.train_program, self.startup_program):
                with fluid.unique_name.guard():
                    self.feed_dict = inputs = self._get_feed_dict()
                    outputs = self.forward(inputs)
                    if self.is_distributed and self.use_recompute:
                        self.dist_strategy.recompute_checkpoints = outputs[
                            "checkpoints"]
                    metrics, statistics = self.get_metrics_and_statistics(
                        inputs, outputs)

                    # build eval program
                    self.eval_program = self.train_program.clone(for_test=True)
                    self.eval_fetch_dict = {**metrics, **statistics}

                    scheduled_lr = self.optimize(metrics)
                    metrics["scheduled_lr"] = scheduled_lr
                    self.train_fetch_dict = metrics

            self.program = self.train_program
            if self.is_distributed:
                self.train_program = fleet.main_program

        self.exe.run(self.startup_program)
        if self.init_pretraining_params != "":
            init_pretraining_params(self.exe, self.init_pretraining_params,
                                    self.program)
        elif self.init_checkpoint != "":
            init_checkpoint(self.exe, self.init_checkpoint, self.program)
        return
def main(args):
    """
    Main Function
    """
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
    exe = fluid.Executor(place)

    reader = task_reader.ClassifyReader(vocab_path=args.vocab_path,
                                        label_map_config=args.label_map_config,
                                        max_seq_len=args.max_seq_len,
                                        do_lower_case=args.do_lower_case,
                                        random_seed=args.random_seed)

    if not (args.do_train or args.do_val or args.do_infer):
        raise ValueError("For args `do_train`, `do_val` and `do_infer`, at "
                         "least one of them must be True.")

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    if args.do_train:
        train_data_generator = reader.data_generator(
            input_file=args.train_set,
            batch_size=args.batch_size,
            epoch=args.epoch,
            shuffle=True,
            phase="train")

        num_train_examples = reader.get_num_examples(args.train_set)

        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        print("Device count: %d" % dev_count)
        print("Num train examples: %d" % num_train_examples)
        print("Max train steps: %d" % max_train_steps)

        train_program = fluid.Program()

        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                # create ernie_pyreader
                train_pyreader, ernie_inputs, labels = ernie_pyreader(
                    args, pyreader_name='train_pyreader')

                # get ernie_embeddings
                if args.use_paddle_hub:
                    embeddings = ernie_encoder_with_paddle_hub(
                        ernie_inputs, args.max_seq_len)
                else:
                    embeddings = ernie_encoder(ernie_inputs,
                                               ernie_config=ernie_config)

                # user defined model based on ernie embeddings
                loss, accuracy, num_seqs = create_model(args,
                                                        embeddings,
                                                        labels=labels,
                                                        is_prediction=False)

                optimizer = fluid.optimizer.Adam(learning_rate=args.lr)
                optimizer.minimize(loss)

        if args.verbose:
            lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                program=train_program, batch_size=args.batch_size)
            print("Theoretical memory usage in training: %.3f - %.3f %s" %
                  (lower_mem, upper_mem, unit))

    if args.do_val:
        test_data_generator = reader.data_generator(input_file=args.dev_set,
                                                    batch_size=args.batch_size,
                                                    phase='dev',
                                                    epoch=1,
                                                    shuffle=False)
        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
                # create ernie_pyreader
                test_pyreader, ernie_inputs, labels = ernie_pyreader(
                    args, pyreader_name='eval_reader')

                # get ernie_embeddings
                if args.use_paddle_hub:
                    embeddings = ernie_encoder_with_paddle_hub(
                        ernie_inputs, args.max_seq_len)
                else:
                    embeddings = ernie_encoder(ernie_inputs,
                                               ernie_config=ernie_config)

                # user defined model based on ernie embeddings
                loss, accuracy, num_seqs = create_model(args,
                                                        embeddings,
                                                        labels=labels,
                                                        is_prediction=False)

        test_prog = test_prog.clone(for_test=True)

    if args.do_infer:
        infer_data_generator = reader.data_generator(
            input_file=args.test_set,
            batch_size=args.batch_size,
            phase='infer',
            epoch=1,
            shuffle=False)
        infer_prog = fluid.Program()
        with fluid.program_guard(infer_prog, startup_prog):
            with fluid.unique_name.guard():
                infer_pyreader, ernie_inputs, labels = ernie_pyreader(
                    args, pyreader_name="infer_pyreader")

                # get ernie_embeddings
                if args.use_paddle_hub:
                    embeddings = ernie_encoder_with_paddle_hub(
                        ernie_inputs, args.max_seq_len)
                else:
                    embeddings = ernie_encoder(ernie_inputs,
                                               ernie_config=ernie_config)

                probs = create_model(args,
                                     embeddings,
                                     labels=labels,
                                     is_prediction=True)

        infer_prog = infer_prog.clone(for_test=True)

    exe.run(startup_prog)

    if args.do_train:
        if args.init_checkpoint:
            init_checkpoint(exe,
                            args.init_checkpoint,
                            main_program=train_program)
    elif args.do_val:
        if not args.init_checkpoint:
            raise ValueError("args 'init_checkpoint' should be set if"
                             "only doing validation or testing!")
        init_checkpoint(exe, args.init_checkpoint, main_program=test_prog)
    elif args.do_infer:
        if not args.init_checkpoint:
            raise ValueError("args 'init_checkpoint' should be set if"
                             "only doing validation or testing!")
        init_checkpoint(exe, args.init_checkpoint, main_program=infer_prog)

    if args.do_train:
        train_exe = exe
        train_pyreader.set_batch_generator(train_data_generator)
    else:
        train_exe = None
    if args.do_val:
        test_exe = exe
        test_pyreader.set_batch_generator(test_data_generator)
    if args.do_infer:
        test_exe = exe
        infer_pyreader.set_batch_generator(infer_data_generator)

    if args.do_train:
        train_pyreader.start()
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        time_begin = time.time()
        while True:
            try:
                steps += 1
                if steps % args.skip_steps == 0:
                    fetch_list = [loss.name, accuracy.name, num_seqs.name]
                else:
                    fetch_list = []

                outputs = train_exe.run(program=train_program,
                                        fetch_list=fetch_list,
                                        return_numpy=False)
                if steps % args.skip_steps == 0:
                    np_loss, np_acc, np_num_seqs = outputs
                    np_loss = np.array(np_loss)
                    np_acc = np.array(np_acc)
                    np_num_seqs = np.array(np_num_seqs)
                    total_cost.extend(np_loss * np_num_seqs)
                    total_acc.extend(np_acc * np_num_seqs)
                    total_num_seqs.extend(np_num_seqs)

                    if args.verbose:
                        verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size(
                        )
                        print(verbose)

                    time_end = time.time()
                    used_time = time_end - time_begin
                    print("step: %d, ave loss: %f, "
                          "ave acc: %f, speed: %f steps/s" %
                          (steps, np.sum(total_cost) / np.sum(total_num_seqs),
                           np.sum(total_acc) / np.sum(total_num_seqs),
                           args.skip_steps / used_time))
                    total_cost, total_acc, total_num_seqs = [], [], []
                    time_begin = time.time()

                if steps % args.save_steps == 0:
                    save_path = os.path.join(args.checkpoints,
                                             "step_" + str(steps),
                                             "checkpoint")
                    fluid.save(train_program, save_path)

                if steps % args.validation_steps == 0:
                    # evaluate dev set
                    if args.do_val:
                        evaluate(exe, test_prog, test_pyreader,
                                 [loss.name, accuracy.name, num_seqs.name],
                                 "dev")

            except fluid.core.EOFException:
                save_path = os.path.join(args.checkpoints,
                                         "step_" + str(steps), "checkpoint")
                fluid.save(train_program, save_path)
                train_pyreader.reset()
                break

    # final eval on dev set
    if args.do_val:
        print("Final validation result:")
        evaluate(exe, test_prog, test_pyreader,
                 [loss.name, accuracy.name, num_seqs.name], "dev")

    # final eval on test set
    if args.do_infer:
        print("Final test result:")
        infer(exe, infer_prog, infer_pyreader, [probs.name], "infer")
Beispiel #23
0
def train(conf_dict, args):
    """
    train processic
    """
    # loading vocabulary
    vocab = utils.load_vocab(args.vocab_path)
    # get vocab size
    conf_dict['dict_size'] = len(vocab)
    # Load network structure dynamically
    net = utils.import_class("../shared_modules/models/matching",
                             conf_dict["net"]["module_name"],
                             conf_dict["net"]["class_name"])(conf_dict)
    # Load loss function dynamically
    loss = utils.import_class("../shared_modules/models/matching/losses",
                              conf_dict["loss"]["module_name"],
                              conf_dict["loss"]["class_name"])(conf_dict)
    # Load Optimization method
    optimizer = utils.import_class(
        "../shared_modules/models/matching/optimizers", "paddle_optimizers",
        conf_dict["optimizer"]["class_name"])(conf_dict)
    # load auc method
    metric = fluid.metrics.Auc(name="auc")
    # Get device
    if args.use_cuda:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    startup_prog = fluid.Program()
    train_program = fluid.Program()

    # used for continuous evaluation
    if args.enable_ce:
        SEED = 102
        startup_prog.random_seed = SEED
        train_program.random_seed = SEED

    simnet_process = reader.SimNetProcessor(args, vocab)
    if args.task_mode == "pairwise":
        # Build network
        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                train_loader, left, pos_right, neg_right = create_model(args)
                left_feat, pos_score = net.predict(left, pos_right)
                pred = pos_score
                _, neg_score = net.predict(left, neg_right)
                avg_cost = loss.compute(pos_score, neg_score)
                avg_cost.persistable = True
                optimizer.ops(avg_cost)

        # Get Reader
        get_train_examples = simnet_process.get_reader("train",
                                                       epoch=args.epoch)
        if args.do_valid:
            test_prog = fluid.Program()
            with fluid.program_guard(test_prog, startup_prog):
                with fluid.unique_name.guard():
                    test_loader, left, pos_right = create_model(
                        args, is_inference=True)
                    left_feat, pos_score = net.predict(left, pos_right)
                    pred = pos_score
            test_prog = test_prog.clone(for_test=True)

    else:
        # Build network
        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                train_loader, left, right, label = create_model(
                    args, is_pointwise=True)
                left_feat, pred = net.predict(left, right)
                avg_cost = loss.compute(pred, label)
                avg_cost.persistable = True
                optimizer.ops(avg_cost)

        # Get Feeder and Reader
        get_train_examples = simnet_process.get_reader("train",
                                                       epoch=args.epoch)
        if args.do_valid:
            test_prog = fluid.Program()
            with fluid.program_guard(test_prog, startup_prog):
                with fluid.unique_name.guard():
                    test_loader, left, right = create_model(args,
                                                            is_inference=True)
                    left_feat, pred = net.predict(left, right)
            test_prog = test_prog.clone(for_test=True)

    if args.init_checkpoint is not "":
        utils.init_checkpoint(exe, args.init_checkpoint, startup_prog)

    def valid_and_test(test_program, test_loader, get_valid_examples, process,
                       mode, exe, fetch_list):
        """
        return auc and acc
        """
        # Get Batch Data
        batch_data = fluid.io.batch(get_valid_examples,
                                    args.batch_size,
                                    drop_last=False)
        test_loader.set_sample_list_generator(batch_data)
        test_loader.start()
        pred_list = []
        while True:
            try:
                _pred = exe.run(program=test_program, fetch_list=[pred.name])
                pred_list += list(_pred)
            except fluid.core.EOFException:
                test_loader.reset()
                break
        pred_list = np.vstack(pred_list)
        if mode == "test":
            label_list = process.get_test_label()
        elif mode == "valid":
            label_list = process.get_valid_label()
        if args.task_mode == "pairwise":
            pred_list = (pred_list + 1) / 2
            pred_list = np.hstack(
                (np.ones_like(pred_list) - pred_list, pred_list))
        metric.reset()
        metric.update(pred_list, label_list)
        auc = metric.eval()
        if args.compute_accuracy:
            acc = utils.get_accuracy(pred_list, label_list, args.task_mode,
                                     args.lamda)
            return auc, acc
        else:
            return auc

    # run train
    logging.info("start train process ...")
    # set global step
    global_step = 0
    ce_info = []
    train_exe = exe
    #for epoch_id in range(args.epoch):
    # used for continuous evaluation
    if args.enable_ce:
        train_batch_data = fluid.io.batch(get_train_examples,
                                          args.batch_size,
                                          drop_last=False)
    else:
        train_batch_data = fluid.io.batch(fluid.io.shuffle(get_train_examples,
                                                           buf_size=10000),
                                          args.batch_size,
                                          drop_last=False)
    train_loader.set_sample_list_generator(train_batch_data)
    train_loader.start()
    exe.run(startup_prog)
    losses = []
    start_time = time.time()
    while True:
        try:
            global_step += 1
            fetch_list = [avg_cost.name]
            avg_loss = train_exe.run(program=train_program,
                                     fetch_list=fetch_list)
            losses.append(np.mean(avg_loss[0]))
            if args.do_valid and global_step % args.validation_steps == 0:
                get_valid_examples = simnet_process.get_reader("valid")
                valid_result = valid_and_test(test_prog, test_loader,
                                              get_valid_examples,
                                              simnet_process, "valid", exe,
                                              [pred.name])
                if args.compute_accuracy:
                    valid_auc, valid_acc = valid_result
                    logging.info(
                        "global_steps: %d, valid_auc: %f, valid_acc: %f, valid_loss: %f"
                        % (global_step, valid_auc, valid_acc, np.mean(losses)))
                else:
                    valid_auc = valid_result
                    logging.info(
                        "global_steps: %d, valid_auc: %f, valid_loss: %f" %
                        (global_step, valid_auc, np.mean(losses)))
            if global_step % args.save_steps == 0:
                model_save_dir = os.path.join(args.output_dir,
                                              conf_dict["model_path"])
                model_path = os.path.join(model_save_dir, str(global_step))

                if not os.path.exists(model_save_dir):
                    os.makedirs(model_save_dir)
                if args.task_mode == "pairwise":
                    feed_var_names = [left.name, pos_right.name]
                    target_vars = [left_feat, pos_score]
                else:
                    feed_var_names = [
                        left.name,
                        right.name,
                    ]
                    target_vars = [left_feat, pred]
                fluid.io.save_inference_model(model_path, feed_var_names,
                                              target_vars, exe, test_prog)
                logging.info("saving infer model in %s" % model_path)

        except fluid.core.EOFException:
            train_loader.reset()
            break
    end_time = time.time()
    #logging.info("epoch: %d, loss: %f, used time: %d sec" %
    #(epoch_id, np.mean(losses), end_time - start_time))
    ce_info.append([np.mean(losses), end_time - start_time])
    #final save
    logging.info("the final step is %s" % global_step)
    model_save_dir = os.path.join(args.output_dir, conf_dict["model_path"])
    model_path = os.path.join(model_save_dir, str(global_step))
    if not os.path.exists(model_save_dir):
        os.makedirs(model_save_dir)
    if args.task_mode == "pairwise":
        feed_var_names = [left.name, pos_right.name]
        target_vars = [left_feat, pos_score]
    else:
        feed_var_names = [
            left.name,
            right.name,
        ]
        target_vars = [left_feat, pred]
    fluid.io.save_inference_model(model_path, feed_var_names, target_vars, exe,
                                  test_prog)
    logging.info("saving infer model in %s" % model_path)
    # used for continuous evaluation
    if args.enable_ce:
        card_num = get_cards()
        ce_loss = 0
        ce_time = 0
        try:
            ce_loss = ce_info[-1][0]
            ce_time = ce_info[-1][1]
        except:
            logging.info("ce info err!")
        print("kpis\teach_step_duration_%s_card%s\t%s" %
              (args.task_name, card_num, ce_time))
        print("kpis\ttrain_loss_%s_card%s\t%f" %
              (args.task_name, card_num, ce_loss))

    if args.do_test:
        if args.task_mode == "pairwise":
            # Get Feeder and Reader
            get_test_examples = simnet_process.get_reader("test")
        else:
            # Get Feeder and Reader
            get_test_examples = simnet_process.get_reader("test")
        test_result = valid_and_test(test_prog, test_loader, get_test_examples,
                                     simnet_process, "test", exe, [pred.name])
        if args.compute_accuracy:
            test_auc, test_acc = test_result
            logging.info("AUC of test is %f, Accuracy of test is %f" %
                         (test_auc, test_acc))
        else:
            test_auc = test_result
            logging.info("AUC of test is %f" % test_auc)
def main(args):
    """
    Main Function
    """
    args = parser.parse_args()
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
    exe = fluid.Executor(place)

    reader = task_reader.SequenceLabelReader(
        vocab_path=args.vocab_path,
        label_map_config=args.label_map_config,
        max_seq_len=args.max_seq_len,
        do_lower_case=args.do_lower_case,
        in_tokens=False,
        random_seed=args.random_seed)

    if not (args.do_train or args.do_test or args.do_infer):
        raise ValueError("For args `do_train`, `do_val` and `do_test`, at "
                         "least one of them must be True.")

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    if args.do_train:
        num_train_examples = reader.get_num_examples(args.train_set)
        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count
        print("Device count: %d" % dev_count)
        print("Num train examples: %d" % num_train_examples)
        print("Max train steps: %d" % max_train_steps)

        train_program = fluid.Program()

        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                # create ernie_pyreader
                train_pyreader, ernie_inputs, words, labels = ernie_pyreader(
                    args, pyreader_name='train_reader')
                train_pyreader.decorate_tensor_provider(
                    reader.data_generator(args.train_set,
                                          args.batch_size,
                                          args.epoch,
                                          shuffle=True,
                                          phase="train"))
                # get ernie_embeddings
                embeddings = ernie_encoder(ernie_inputs,
                                           ernie_config=ernie_config)
                # user defined model based on ernie embeddings
                train_ret = create_model(args,
                                         embeddings,
                                         labels=labels,
                                         is_prediction=False)

                optimizer = fluid.optimizer.Adam(learning_rate=args.lr)
                fluid.clip.set_gradient_clip(
                    clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0))
                optimizer.minimize(train_ret["loss"])

        lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
            program=train_program, batch_size=args.batch_size)
        print("Theoretical memory usage in training: %.3f - %.3f %s" %
              (lower_mem, upper_mem, unit))

    if args.do_test:
        test_program = fluid.Program()
        with fluid.program_guard(test_program, startup_prog):
            with fluid.unique_name.guard():
                # create ernie_pyreader
                test_pyreader, ernie_inputs, words, labels = ernie_pyreader(
                    args, pyreader_name='test_reader')
                test_pyreader.decorate_tensor_provider(
                    reader.data_generator(args.test_set,
                                          args.batch_size,
                                          phase='test',
                                          epoch=1,
                                          shuffle=False))
                # get ernie_embeddings
                embeddings = ernie_encoder(ernie_inputs,
                                           ernie_config=ernie_config)
                # user defined model based on ernie embeddings
                test_ret = create_model(args,
                                        embeddings,
                                        labels=labels,
                                        is_prediction=False)

        test_program = test_program.clone(for_test=True)

    if args.do_infer:
        infer_program = fluid.Program()
        with fluid.program_guard(infer_program, startup_prog):
            with fluid.unique_name.guard():
                # create ernie_pyreader
                infer_pyreader, ernie_inputs, words, labels = ernie_pyreader(
                    args, pyreader_name='infer_reader')
                infer_pyreader.decorate_tensor_provider(
                    reader.data_generator(args.infer_set,
                                          args.batch_size,
                                          phase='infer',
                                          epoch=1,
                                          shuffle=False))
                # get ernie_embeddings
                embeddings = ernie_encoder(ernie_inputs,
                                           ernie_config=ernie_config)
                # user defined model based on ernie embeddings
                infer_ret = create_model(args,
                                         embeddings,
                                         labels=labels,
                                         is_prediction=True)
                infer_ret["words"] = words

        infer_program = infer_program.clone(for_test=True)

    exe.run(startup_prog)

    # load checkpoints
    if args.do_train:
        if args.init_checkpoint and args.init_pretraining_params:
            print(
                "WARNING: args 'init_checkpoint' and 'init_pretraining_params' "
                "both are set! Only arg 'init_checkpoint' is made valid.")
        if args.init_checkpoint:
            utils.init_checkpoint(exe, args.init_checkpoint, startup_prog)
        elif args.init_pretraining_params:
            utils.init_pretraining_params(exe, args.init_pretraining_params,
                                          startup_prog)
    elif args.do_test or args.do_infer:
        if not args.init_checkpoint:
            raise ValueError(
                "args 'init_checkpoint' should be set if only doing test or infer!"
            )
        utils.init_checkpoint(exe, args.init_checkpoint, startup_prog)

    if args.do_train:
        train_pyreader.start()
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        while True:
            try:
                steps += 1
                if steps % args.skip_steps == 0:
                    fetch_list = [
                        train_ret["loss"],
                        train_ret["num_infer_chunks"],
                        train_ret["num_label_chunks"],
                        train_ret["num_correct_chunks"],
                    ]
                else:
                    fetch_list = []

                start_time = time.time()
                outputs = exe.run(program=train_program, fetch_list=fetch_list)
                end_time = time.time()
                if steps % args.skip_steps == 0:
                    loss, nums_infer, nums_label, nums_correct = outputs
                    train_ret["chunk_evaluator"].reset()
                    train_ret["chunk_evaluator"].update(
                        nums_infer, nums_label, nums_correct)
                    precision, recall, f1_score = train_ret[
                        "chunk_evaluator"].eval()
                    print(
                        "[train] batch_id = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f, "
                        "pyreader queue_size: %d " %
                        (steps, loss, precision, recall, f1_score,
                         end_time - start_time, train_pyreader.queue.size()))

                if steps % args.save_steps == 0:
                    save_path = os.path.join(args.checkpoints,
                                             "step_" + str(steps))
                    print("\tsaving model as %s" % (save_path))
                    fluid.io.save_persistables(exe, save_path, train_program)

                if steps % args.validation_steps == 0:
                    # evaluate test set
                    if args.do_test:
                        evaluate(exe, test_program, test_pyreader, test_ret)

            except fluid.core.EOFException:
                save_path = os.path.join(args.checkpoints,
                                         "step_" + str(steps))
                fluid.io.save_persistables(exe, save_path, train_program)
                train_pyreader.reset()
                break

    # final eval on test set
    if args.do_test:
        evaluate(exe, test_program, test_pyreader, test_ret)

    if args.do_infer:
        # create dict
        id2word_dict = dict([(str(word_id), word)
                             for word, word_id in reader.vocab.items()])
        id2label_dict = dict([(str(label_id), label)
                              for label, label_id in reader.label_map.items()])
        Dataset = namedtuple("Dataset", ["id2word_dict", "id2label_dict"])
        dataset = Dataset(id2word_dict, id2label_dict)

        infer_pyreader.start()
        while True:
            try:
                (words, crf_decode) = exe.run(
                    infer_program,
                    fetch_list=[infer_ret["words"], infer_ret["crf_decode"]],
                    return_numpy=False)
                # User should notice that words had been clipped if long than args.max_seq_len
                results = utils.parse_result(words, crf_decode, dataset)
                for result in results:
                    print(result)
            except fluid.core.EOFException:
                infer_pyreader.reset()
                break
def main(args):
    """
    Main Function
    """
    global DEV_COUNT
    startup_prog = fluid.default_startup_program()
    random.seed(args.random_seed)
    model_config = ConfigReader.read_conf(args.config_path)
    if args.use_cuda:
        test_place = fluid.cuda_places(0)
        place = fluid.cuda_places()
        DEV_COUNT = len(place)
    else:
        test_place = fluid.cpu_places(1)
        os.environ['CPU_NUM'] = str(args.cpu_num)
        place = fluid.cpu_places()
        DEV_COUNT = args.cpu_num
    logger.info("Dev Num is %s" % str(DEV_COUNT))
    exe = fluid.Executor(place[0])
    if args.do_train and args.build_dict:
        DataProcesser.build_dict(args.data_dir + "train.txt", args.data_dir)
    # read dict
    char_dict = DataProcesser.read_dict(args.data_dir + "char.dict")
    dict_dim = len(char_dict)
    intent_dict = DataProcesser.read_dict(args.data_dir + "domain.dict")
    id2intent = {}
    for key, value in intent_dict.items():
        id2intent[int(value)] = key
    num_labels = len(intent_dict)
    # build model
    loader_res = build_data_loader(args, char_dict, intent_dict)
    build_res = build_graph(args, model_config, num_labels, dict_dim, place,
                            test_place, loader_res)
    build_res["place"] = place
    build_res["test_place"] = test_place
    if not (args.do_train or args.do_eval or args.do_test):
        raise ValueError("For args `do_train`, `do_eval` and `do_test`, at "
                         "least one of them must be True.")

    exe.run(startup_prog)
    if args.init_checkpoint and args.init_checkpoint != "None":
        try:
            init_checkpoint(exe,
                            args.init_checkpoint,
                            main_program=startup_prog)
            logger.info("Load model from %s" % args.init_checkpoint)
        except Exception as e:
            logger.exception(str(e))
            logger.error("Faild load model from %s [%s]" %
                         (args.init_checkpoint, str(e)))
    build_strategy = fluid.compiler.BuildStrategy()
    build_strategy.fuse_all_reduce_ops = False
    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.num_threads = 1
    # add compiled prog
    if args.do_train:
        compiled_prog = fluid.compiler.CompiledProgram(build_res["train_prog"]).with_data_parallel( \
                                                                    loss_name=build_res["cost"].name, \
                                                                    build_strategy=build_strategy, \
                                                                    exec_strategy=exec_strategy)
        build_res["compiled_prog"] = compiled_prog
    if args.do_test:
        test_compiled_prog = fluid.compiler.CompiledProgram(
            build_res["test_prog"])
        build_res["test_compiled_prog"] = test_compiled_prog
    if args.do_eval:
        eval_compiled_prog = fluid.compiler.CompiledProgram(
            build_res["eval_prog"])
        build_res["eval_compiled_prog"] = eval_compiled_prog

    if args.do_train:
        train(args, exe, build_res, place)
    if args.do_eval:
        evaluate(args, exe, build_res, "eval", \
                 save_result=True, id2intent=id2intent)
    if args.do_test:
        evaluate(args, exe, build_res, "test",\
                  save_result=True, id2intent=id2intent)