Ejemplo n.º 1
0
 def load(self, model_dir, is_checkpoint=False):
     """
     Load persistables or parameters.
     """
     # TODO: support dygraph.
     if is_checkpoint:
         init_checkpoint(self.exe, model_dir, self.program)
     else:
         init_pretraining_params(self.exe, model_dir, self.program)
     return
Ejemplo n.º 2
0
def main(args):

    task_name = args.task_name.lower()
    processors = {
        'match': reader.MatchProcessor,
    }

    processor = processors[task_name](data_dir=args.data_dir,
                                      vocab_path=args.vocab_path,
                                      max_seq_len=args.max_seq_len,
                                      do_lower_case=args.do_lower_case)
    num_labels = len(processor.get_labels())
    infer_data_generator = processor.data_generator(
        batch_size=args.batch_size,
        phase='dev',
        epoch=args.epoch,
        shuffle=False)

    main_program = fluid.default_main_program()
    feed_order, loss, probs, accuracy, num_seqs = create_model(
                args,
                num_labels=num_labels)

    if args.use_cuda: 
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()

    exe = fluid.Executor(place)
    exe.run(framework.default_startup_program())

    if args.init_checkpoint: 
        init_pretraining_params(exe, args.init_checkpoint, main_program)

    feed_list = [
        main_program.global_block().var(var_name) for var_name in feed_order
        ]
    feeder = fluid.DataFeeder(feed_list, place)

    label_list = []
    for batch_id, data in enumerate(infer_data_generator()): 
        results = exe.run(
                fetch_list=[probs],
                feed=feeder.feed(data),
                return_numpy=True)
        for elem in results[0]: 
            label_list.append(str(elem[1]))

    return label_list
Ejemplo n.º 3
0
    def _build_programs(self):
        """
        Build programs.

        Build train_program, eval_program and inference_program. Only use in static graph mode.
        """
        if self.run_infer:
            self.startup_program = fluid.Program()
            # build infer program
            self.infer_program = fluid.Program()
            with fluid.program_guard(self.infer_program, self.startup_program):
                with fluid.unique_name.guard():
                    self.infer_feed_dict = inputs = self._get_feed_dict(is_infer=True)
                    outputs = self.forward(inputs, is_infer=True)
                    predictions = self.infer(inputs, outputs)
                    self.infer_fetch_dict = predictions
            self.infer_program = self.infer_program.clone(for_test=True)

            self.exe.run(self.startup_program)
            if self.init_pretraining_params is not None:
                init_pretraining_params(self.exe, self.init_pretraining_params, self.infer_program)
            if self.init_checkpoint is not None:
                init_checkpoint(self.exe, self.init_checkpoint, self.infer_program)
        else:
            self.startup_program = fluid.Program()
            # build train program
            self.train_program = fluid.Program()
            with fluid.program_guard(self.train_program, self.startup_program):
                with fluid.unique_name.guard():
                    self.feed_dict = inputs = self._get_feed_dict()
                    outputs = self.forward(inputs)
                    metrics, statistics = self.get_metrics_and_statistics(inputs, outputs)

                    # build eval program
                    self.eval_program = self.train_program.clone(for_test=True)
                    self.eval_fetch_dict = {**metrics, **statistics}

                    self.optimize(metrics)
                    self.train_fetch_dict = metrics

            self.exe.run(self.startup_program)
            if self.init_pretraining_params is not None:
                init_pretraining_params(self.exe, self.init_pretraining_params, self.train_program)
            if self.init_checkpoint is not None:
                init_checkpoint(self.exe, self.init_checkpoint, self.train_program)
        return
Ejemplo n.º 4
0
def main(args):

    task_name = args.task_name.lower()
    processor = reader.MatchProcessor(data_dir=args.data_dir,
                                      task_name=task_name,
                                      vocab_path=args.vocab_path,
                                      max_seq_len=args.max_seq_len,
                                      do_lower_case=args.do_lower_case)

    num_labels = len(processor.get_labels())
    infer_data_generator = processor.data_generator(batch_size=args.batch_size,
                                                    phase='test',
                                                    epoch=1,
                                                    shuffle=False)
    num_test_examples = processor.get_num_examples(phase='test')
    main_program = fluid.default_main_program()

    feed_order, loss, probs, accuracy, num_seqs = create_model(
        args, num_labels=num_labels, is_prediction=True)

    if args.use_cuda:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    exe = fluid.Executor(place)
    exe.run(framework.default_startup_program())

    if args.init_checkpoint:
        init_pretraining_params(exe, args.init_checkpoint, main_program)

    feed_list = [
        main_program.global_block().var(var_name) for var_name in feed_order
    ]
    feeder = fluid.DataFeeder(feed_list, place)

    for batch_id, data in enumerate(infer_data_generator()):
        results = exe.run(fetch_list=[probs],
                          feed=feeder.feed(data),
                          return_numpy=True)
        for elem in results[0]:
            print(elem[1])
def main(args):
    """
    Main Function
    """
    args = parser.parse_args()
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
    exe = fluid.Executor(place)

    reader = task_reader.SequenceLabelReader(
        vocab_path=args.vocab_path,
        label_map_config=args.label_map_config,
        max_seq_len=args.max_seq_len,
        do_lower_case=args.do_lower_case,
        in_tokens=False,
        random_seed=args.random_seed)

    if not (args.do_train or args.do_test or args.do_infer):
        raise ValueError("For args `do_train`, `do_val` and `do_test`, at "
                         "least one of them must be True.")

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    if args.do_train:
        num_train_examples = reader.get_num_examples(args.train_set)
        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count
        print("Device count: %d" % dev_count)
        print("Num train examples: %d" % num_train_examples)
        print("Max train steps: %d" % max_train_steps)

        train_program = fluid.Program()

        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                # create ernie_pyreader
                train_pyreader, ernie_inputs, words, labels = ernie_pyreader(
                    args, pyreader_name='train_reader')
                train_pyreader.decorate_tensor_provider(
                    reader.data_generator(args.train_set,
                                          args.batch_size,
                                          args.epoch,
                                          shuffle=True,
                                          phase="train"))
                # get ernie_embeddings
                embeddings = ernie_encoder(ernie_inputs,
                                           ernie_config=ernie_config)
                # user defined model based on ernie embeddings
                train_ret = create_model(args,
                                         embeddings,
                                         labels=labels,
                                         is_prediction=False)

                optimizer = fluid.optimizer.Adam(learning_rate=args.lr)
                fluid.clip.set_gradient_clip(
                    clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0))
                optimizer.minimize(train_ret["loss"])

        lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
            program=train_program, batch_size=args.batch_size)
        print("Theoretical memory usage in training: %.3f - %.3f %s" %
              (lower_mem, upper_mem, unit))

    if args.do_test:
        test_program = fluid.Program()
        with fluid.program_guard(test_program, startup_prog):
            with fluid.unique_name.guard():
                # create ernie_pyreader
                test_pyreader, ernie_inputs, words, labels = ernie_pyreader(
                    args, pyreader_name='test_reader')
                test_pyreader.decorate_tensor_provider(
                    reader.data_generator(args.test_set,
                                          args.batch_size,
                                          phase='test',
                                          epoch=1,
                                          shuffle=False))
                # get ernie_embeddings
                embeddings = ernie_encoder(ernie_inputs,
                                           ernie_config=ernie_config)
                # user defined model based on ernie embeddings
                test_ret = create_model(args,
                                        embeddings,
                                        labels=labels,
                                        is_prediction=False)

        test_program = test_program.clone(for_test=True)

    if args.do_infer:
        infer_program = fluid.Program()
        with fluid.program_guard(infer_program, startup_prog):
            with fluid.unique_name.guard():
                # create ernie_pyreader
                infer_pyreader, ernie_inputs, words, labels = ernie_pyreader(
                    args, pyreader_name='infer_reader')
                infer_pyreader.decorate_tensor_provider(
                    reader.data_generator(args.infer_set,
                                          args.batch_size,
                                          phase='infer',
                                          epoch=1,
                                          shuffle=False))
                # get ernie_embeddings
                embeddings = ernie_encoder(ernie_inputs,
                                           ernie_config=ernie_config)
                # user defined model based on ernie embeddings
                infer_ret = create_model(args,
                                         embeddings,
                                         labels=labels,
                                         is_prediction=True)
                infer_ret["words"] = words

        infer_program = infer_program.clone(for_test=True)

    exe.run(startup_prog)

    # load checkpoints
    if args.do_train:
        if args.init_checkpoint and args.init_pretraining_params:
            print(
                "WARNING: args 'init_checkpoint' and 'init_pretraining_params' "
                "both are set! Only arg 'init_checkpoint' is made valid.")
        if args.init_checkpoint:
            utils.init_checkpoint(exe, args.init_checkpoint, startup_prog)
        elif args.init_pretraining_params:
            utils.init_pretraining_params(exe, args.init_pretraining_params,
                                          startup_prog)
    elif args.do_test or args.do_infer:
        if not args.init_checkpoint:
            raise ValueError(
                "args 'init_checkpoint' should be set if only doing test or infer!"
            )
        utils.init_checkpoint(exe, args.init_checkpoint, startup_prog)

    if args.do_train:
        train_pyreader.start()
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        while True:
            try:
                steps += 1
                if steps % args.skip_steps == 0:
                    fetch_list = [
                        train_ret["loss"],
                        train_ret["num_infer_chunks"],
                        train_ret["num_label_chunks"],
                        train_ret["num_correct_chunks"],
                    ]
                else:
                    fetch_list = []

                start_time = time.time()
                outputs = exe.run(program=train_program, fetch_list=fetch_list)
                end_time = time.time()
                if steps % args.skip_steps == 0:
                    loss, nums_infer, nums_label, nums_correct = outputs
                    train_ret["chunk_evaluator"].reset()
                    train_ret["chunk_evaluator"].update(
                        nums_infer, nums_label, nums_correct)
                    precision, recall, f1_score = train_ret[
                        "chunk_evaluator"].eval()
                    print(
                        "[train] batch_id = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f, "
                        "pyreader queue_size: %d " %
                        (steps, loss, precision, recall, f1_score,
                         end_time - start_time, train_pyreader.queue.size()))

                if steps % args.save_steps == 0:
                    save_path = os.path.join(args.checkpoints,
                                             "step_" + str(steps))
                    print("\tsaving model as %s" % (save_path))
                    fluid.io.save_persistables(exe, save_path, train_program)

                if steps % args.validation_steps == 0:
                    # evaluate test set
                    if args.do_test:
                        evaluate(exe, test_program, test_pyreader, test_ret)

            except fluid.core.EOFException:
                save_path = os.path.join(args.checkpoints,
                                         "step_" + str(steps))
                fluid.io.save_persistables(exe, save_path, train_program)
                train_pyreader.reset()
                break

    # final eval on test set
    if args.do_test:
        evaluate(exe, test_program, test_pyreader, test_ret)

    if args.do_infer:
        # create dict
        id2word_dict = dict([(str(word_id), word)
                             for word, word_id in reader.vocab.items()])
        id2label_dict = dict([(str(label_id), label)
                              for label, label_id in reader.label_map.items()])
        Dataset = namedtuple("Dataset", ["id2word_dict", "id2label_dict"])
        dataset = Dataset(id2word_dict, id2label_dict)

        infer_pyreader.start()
        while True:
            try:
                (words, crf_decode) = exe.run(
                    infer_program,
                    fetch_list=[infer_ret["words"], infer_ret["crf_decode"]],
                    return_numpy=False)
                # User should notice that words had been clipped if long than args.max_seq_len
                results = utils.parse_result(words, crf_decode, dataset)
                for result in results:
                    print(result)
            except fluid.core.EOFException:
                infer_pyreader.reset()
                break
Ejemplo n.º 6
0
    def _build_programs(self):
        """
        Build programs.

        Build train_program, eval_program and inference_program. Only use in static graph mode.
        """
        if self.run_infer:
            self.startup_program = fluid.Program()
            # build infer program
            self.infer_program = fluid.Program()
            with fluid.program_guard(self.infer_program, self.startup_program):
                with fluid.unique_name.guard():
                    self.infer_feed_dict = inputs = self._get_feed_dict(
                        is_infer=True)
                    outputs = self.forward(inputs, is_infer=True)
                    predictions = self.infer(inputs, outputs)
                    self.infer_fetch_dict = predictions
            self.infer_program = self.infer_program.clone(for_test=True)

            self.program = self.infer_program
        else:
            if self.is_distributed:
                exec_strategy = fluid.ExecutionStrategy()
                exec_strategy.use_experimental_executor = True
                exec_strategy.num_threads = 4
                exec_strategy.num_iteration_per_drop_scope = 1

                dist_strategy = DistributedStrategy()
                dist_strategy.exec_strategy = exec_strategy
                dist_strategy.nccl_comm_num = 1
                dist_strategy.fuse_all_reduce_ops = True
                if self.use_recompute:
                    dist_strategy.forward_recompute = True
                    dist_strategy.enable_sequential_execution = True
                if self.use_amp:
                    dist_strategy.use_amp = True
                    dist_strategy.amp_loss_scaling = self.amp_loss_scaling
                self.dist_strategy = dist_strategy

            self.startup_program = fluid.Program()
            # build train program
            self.train_program = fluid.Program()
            with fluid.program_guard(self.train_program, self.startup_program):
                with fluid.unique_name.guard():
                    self.feed_dict = inputs = self._get_feed_dict()
                    outputs = self.forward(inputs)
                    if self.is_distributed and self.use_recompute:
                        self.dist_strategy.recompute_checkpoints = outputs[
                            "checkpoints"]
                    metrics, statistics = self.get_metrics_and_statistics(
                        inputs, outputs)

                    # build eval program
                    self.eval_program = self.train_program.clone(for_test=True)
                    self.eval_fetch_dict = {**metrics, **statistics}

                    scheduled_lr = self.optimize(metrics)
                    metrics["scheduled_lr"] = scheduled_lr
                    self.train_fetch_dict = metrics

            self.program = self.train_program
            if self.is_distributed:
                self.train_program = fleet.main_program

        self.exe.run(self.startup_program)
        if self.init_pretraining_params != "":
            init_pretraining_params(self.exe, self.init_pretraining_params,
                                    self.program)
        elif self.init_checkpoint != "":
            init_checkpoint(self.exe, self.init_checkpoint, self.program)
        return
Ejemplo n.º 7
0
def do_train(args):
    """
    Main Function
    """
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = 1
    else:
        dev_count = min(multiprocessing.cpu_count(), args.cpu_num)
        if (dev_count < args.cpu_num):
            print(
                "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. "
                "Change the cpu_num from %d to %d" %
                (dev_count, args.cpu_num, dev_count))
        os.environ['CPU_NUM'] = str(dev_count)
        place = fluid.CPUPlace()

    exe = fluid.Executor(place)

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    train_program = fluid.Program()
    with fluid.program_guard(train_program, startup_prog):
        with fluid.unique_name.guard():
            # user defined model based on ernie embeddings
            train_ret = creator.create_ernie_model(args, ernie_config)

            # ernie pyreader
            train_pyreader = creator.create_pyreader(
                args,
                file_name=args.train_data,
                feed_list=train_ret['feed_list'],
                model="ernie",
                place=place)

            test_program = train_program.clone(for_test=True)
            test_pyreader = creator.create_pyreader(
                args,
                file_name=args.test_data,
                feed_list=train_ret['feed_list'],
                model="ernie",
                place=place)

            clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
            optimizer = fluid.optimizer.Adam(
                learning_rate=args.base_learning_rate, grad_clip=clip)
            optimizer.minimize(train_ret["avg_cost"])

    lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
        program=train_program, batch_size=args.batch_size)
    print("Theoretical memory usage in training: %.3f - %.3f %s" %
          (lower_mem, upper_mem, unit))
    print("Device count: %d" % dev_count)

    exe.run(startup_prog)
    # load checkpoints
    if args.init_checkpoint and args.init_pretraining_params:
        print("WARNING: args 'init_checkpoint' and 'init_pretraining_params' "
              "both are set! Only arg 'init_checkpoint' is made valid.")
    if args.init_checkpoint:
        utils.init_checkpoint(exe, args.init_checkpoint, startup_prog)
    elif args.init_pretraining_params:
        utils.init_pretraining_params(exe, args.init_pretraining_params,
                                      startup_prog)

    if dev_count > 1 and not args.use_cuda:
        device = "GPU" if args.use_cuda else "CPU"
        print("%d %s are used to train model" % (dev_count, device))

        # multi cpu/gpu config
        exec_strategy = fluid.ExecutionStrategy()
        build_strategy = fluid.BuildStrategy()
        compiled_prog = fluid.compiler.CompiledProgram(
            train_program).with_data_parallel(
                loss_name=train_ret['avg_cost'].name,
                build_strategy=build_strategy,
                exec_strategy=exec_strategy)
    else:
        compiled_prog = fluid.compiler.CompiledProgram(train_program)

    # start training
    steps = 0
    for epoch_id in range(args.epoch):
        for data in train_pyreader():
            steps += 1
            if steps % args.print_steps == 0:
                fetch_list = [
                    train_ret["avg_cost"],
                    train_ret["precision"],
                    train_ret["recall"],
                    train_ret["f1_score"],
                ]
            else:
                fetch_list = []

            start_time = time.time()

            outputs = exe.run(program=compiled_prog,
                              feed=data[0],
                              fetch_list=fetch_list)
            end_time = time.time()
            if steps % args.print_steps == 0:
                loss, precision, recall, f1_score = [
                    np.mean(x) for x in outputs
                ]
                print(
                    "[train] batch_id = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f, "
                    "pyreader queue_size: %d " %
                    (steps, loss, precision, recall, f1_score,
                     end_time - start_time, train_pyreader.queue.size()))

            if steps % args.save_steps == 0:
                save_path = os.path.join(args.model_save_dir,
                                         "step_" + str(steps), "checkpoint")
                print("\tsaving model as %s" % (save_path))
                fluid.save(train_program, save_path)

            if steps % args.validation_steps == 0:
                evaluate(exe, test_program, test_pyreader, train_ret)

    save_path = os.path.join(args.model_save_dir, "step_" + str(steps),
                             "checkpoint")
    fluid.save(train_program, save_path)