Beispiel #1
0
def do_save_inference_model(args):
    """save inference model function"""

    task_name = args.task_name.lower()
    paradigm_inst = define_paradigm.Paradigm(task_name)

    processors = {
        'udc': reader.UDCProcessor,
        'swda': reader.SWDAProcessor,
        'mrda': reader.MRDAProcessor,
        'atis_slot': reader.ATISSlotProcessor,
        'atis_intent': reader.ATISIntentProcessor,
        'dstc2': reader.DSTC2Processor,
    }

    test_prog = fluid.default_main_program()
    startup_prog = fluid.default_startup_program()

    with fluid.program_guard(test_prog, startup_prog):
        test_prog.random_seed = args.random_seed
        startup_prog.random_seed = args.random_seed

        with fluid.unique_name.guard():

            # define inputs of the network
            num_labels = len(processors[task_name].get_labels())

            src_ids = fluid.layers.data(name='src_ids',
                                        shape=[args.max_seq_len, 1],
                                        dtype='int64')
            pos_ids = fluid.layers.data(name='pos_ids',
                                        shape=[args.max_seq_len, 1],
                                        dtype='int64')
            sent_ids = fluid.layers.data(name='sent_ids',
                                         shape=[args.max_seq_len, 1],
                                         dtype='int64')
            input_mask = fluid.layers.data(name='input_mask',
                                           shape=[args.max_seq_len, 1],
                                           dtype='float32')
            if args.task_name == 'atis_slot':
                labels = fluid.layers.data(name='labels',
                                           shape=[args.max_seq_len],
                                           dtype='int64')
            elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']:
                labels = fluid.layers.data(name='labels',
                                           shape=[num_labels],
                                           dtype='int64')
            else:
                labels = fluid.layers.data(name='labels',
                                           shape=[1],
                                           dtype='int64')

            input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
            input_field = InputField(input_inst)

            results = create_net(is_training=False,
                                 model_input=input_field,
                                 num_labels=num_labels,
                                 paradigm_inst=paradigm_inst,
                                 args=args)
            probs = results.get("probs", None)

    if args.use_cuda:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    assert (args.init_from_params) or (args.init_from_pretrain_model)

    if args.init_from_params:
        save_load_io.init_from_params(args, exe, test_prog)
    elif args.init_from_pretrain_model:
        save_load_io.init_from_pretrain_model(args, exe, test_prog)

    # saving inference model
    fluid.io.save_inference_model(args.inference_model_dir,
                                  feeded_var_names=[
                                      input_field.src_ids.name,
                                      input_field.pos_ids.name,
                                      input_field.sent_ids.name,
                                      input_field.input_mask.name
                                  ],
                                  target_vars=[probs],
                                  executor=exe,
                                  main_program=test_prog,
                                  model_filename="model.pdmodel",
                                  params_filename="params.pdparams")

    print("save inference model at %s" % (args.inference_model_dir))
Beispiel #2
0
def do_predict(args):
    """predict function"""

    task_name = args.task_name.lower()
    paradigm_inst = define_paradigm.Paradigm(task_name)
    pred_inst = define_predict_pack.DefinePredict()
    pred_func = getattr(pred_inst, pred_inst.task_map[task_name])

    processors = {
        'udc': reader.UDCProcessor,
        'swda': reader.SWDAProcessor,
        'mrda': reader.MRDAProcessor,
        'atis_slot': reader.ATISSlotProcessor,
        'atis_intent': reader.ATISIntentProcessor,
        'dstc2': reader.DSTC2Processor,
    }

    test_prog = fluid.default_main_program()
    startup_prog = fluid.default_startup_program()

    with fluid.program_guard(test_prog, startup_prog):
        test_prog.random_seed = args.random_seed
        startup_prog.random_seed = args.random_seed

        with fluid.unique_name.guard():

            # define inputs of the network
            num_labels = len(processors[task_name].get_labels())

            src_ids = fluid.data(name='src_ids',
                                 shape=[-1, args.max_seq_len],
                                 dtype='int64')
            pos_ids = fluid.data(name='pos_ids',
                                 shape=[-1, args.max_seq_len],
                                 dtype='int64')
            sent_ids = fluid.data(name='sent_ids',
                                  shape=[-1, args.max_seq_len],
                                  dtype='int64')
            input_mask = fluid.data(name='input_mask',
                                    shape=[-1, args.max_seq_len, 1],
                                    dtype='float32')
            if args.task_name == 'atis_slot':
                labels = fluid.data(name='labels',
                                    shape=[-1, args.max_seq_len],
                                    dtype='int64')
            elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']:
                labels = fluid.data(name='labels',
                                    shape=[-1, num_labels],
                                    dtype='int64')
            else:
                labels = fluid.data(name='labels',
                                    shape=[-1, 1],
                                    dtype='int64')

            input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
            input_field = InputField(input_inst)
            data_reader = fluid.io.DataLoader.from_generator(
                feed_list=input_inst, capacity=4, iterable=False)

            results = create_net(is_training=False,
                                 model_input=input_field,
                                 num_labels=num_labels,
                                 paradigm_inst=paradigm_inst,
                                 args=args)

            probs = results.get("probs", None)
            fetch_list = [probs.name]

    #for_test is True if change the is_test attribute of operators to True
    test_prog = test_prog.clone(for_test=True)

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    assert (args.init_from_params)

    if args.init_from_params:
        fluid.load(test_prog, args.init_from_params)

    compiled_test_prog = fluid.CompiledProgram(test_prog)

    processor = processors[task_name](data_dir=args.data_dir,
                                      vocab_path=args.vocab_path,
                                      max_seq_len=args.max_seq_len,
                                      do_lower_case=args.do_lower_case,
                                      in_tokens=args.in_tokens,
                                      task_name=task_name,
                                      random_seed=args.random_seed)
    batch_generator = processor.data_generator(batch_size=args.batch_size,
                                               phase='test',
                                               shuffle=False)

    data_reader.set_batch_generator(batch_generator, places=place)
    data_reader.start()

    all_results = []
    while True:
        try:
            results = exe.run(compiled_test_prog, fetch_list=fetch_list)
            all_results.extend(results[0])
        except fluid.core.EOFException:
            data_reader.reset()
            break

    np.set_printoptions(precision=4, suppress=True)
    print("Write the predicted results into the output_prediction_file")

    fw = io.open(args.output_prediction_file, 'w', encoding="utf8")
    if task_name not in ['atis_slot']:
        for index, result in enumerate(all_results):
            tags = pred_func(result)
            fw.write("%s%s%s%s" % (index, tab_tok, tags, rt_tok))
    else:
        tags = pred_func(all_results, args.max_seq_len)
        for index, tag in enumerate(tags):
            fw.write("%s%s%s%s" % (index, tab_tok, tag, rt_tok))
Beispiel #3
0
def do_train(args):
    """train function"""

    task_name = args.task_name.lower()
    paradigm_inst = define_paradigm.Paradigm(task_name)

    processors = {
        'udc': reader.UDCProcessor,
        'swda': reader.SWDAProcessor,
        'mrda': reader.MRDAProcessor,
        'atis_slot': reader.ATISSlotProcessor,
        'atis_intent': reader.ATISIntentProcessor,
        'dstc2': reader.DSTC2Processor,
    }

    train_prog = fluid.default_main_program()
    startup_prog = fluid.default_startup_program()

    with fluid.program_guard(train_prog, startup_prog):
        train_prog.random_seed = args.random_seed
        startup_prog.random_seed = args.random_seed
        with fluid.unique_name.guard():
            num_labels = len(processors[task_name].get_labels())

            src_ids = fluid.layers.data(name='src_ids',
                                        shape=[args.max_seq_len, 1],
                                        dtype='int64')
            pos_ids = fluid.layers.data(name='pos_ids',
                                        shape=[args.max_seq_len, 1],
                                        dtype='int64')
            sent_ids = fluid.layers.data(name='sent_ids',
                                         shape=[args.max_seq_len, 1],
                                         dtype='int64')
            input_mask = fluid.layers.data(name='input_mask',
                                           shape=[args.max_seq_len, 1],
                                           dtype='float32')
            if args.task_name == 'atis_slot':
                labels = fluid.layers.data(name='labels',
                                           shape=[args.max_seq_len],
                                           dtype='int64')
            elif args.task_name in ['dstc2']:
                labels = fluid.layers.data(name='labels',
                                           shape=[num_labels],
                                           dtype='int64')
            else:
                labels = fluid.layers.data(name='labels',
                                           shape=[1],
                                           dtype='int64')

            input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
            input_field = InputField(input_inst)
            data_reader = fluid.io.PyReader(feed_list=input_inst,
                                            capacity=4,
                                            iterable=False)
            processor = processors[task_name](data_dir=args.data_dir,
                                              vocab_path=args.vocab_path,
                                              max_seq_len=args.max_seq_len,
                                              do_lower_case=args.do_lower_case,
                                              in_tokens=args.in_tokens,
                                              task_name=task_name,
                                              random_seed=args.random_seed)

            results = create_net(is_training=True,
                                 model_input=input_field,
                                 num_labels=num_labels,
                                 paradigm_inst=paradigm_inst,
                                 args=args)

            loss = results.get("loss", None)
            probs = results.get("probs", None)
            accuracy = results.get("accuracy", None)
            num_seqs = results.get("num_seqs", None)

            loss.persistable = True
            probs.persistable = True
            if accuracy:
                accuracy.persistable = True
            num_seqs.persistable = True

            if args.use_cuda:
                dev_count = fluid.core.get_cuda_device_count()
            else:
                dev_count = int(
                    os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

            batch_generator = processor.data_generator(
                batch_size=args.batch_size, phase='train', shuffle=True)
            num_train_examples = processor.get_num_examples(phase='train')

            if args.in_tokens:
                max_train_steps = args.epoch * num_train_examples // (
                    args.batch_size // args.max_seq_len) // dev_count
            else:
                max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

            warmup_steps = int(max_train_steps * args.warmup_proportion)
            print("Num train examples: %d" % num_train_examples)
            print("Max train steps: %d" % max_train_steps)
            print("Num warmup steps: %d" % warmup_steps)

            optimizor = optimization(loss=loss,
                                     warmup_steps=warmup_steps,
                                     num_train_steps=max_train_steps,
                                     learning_rate=args.learning_rate,
                                     train_program=train_prog,
                                     startup_prog=startup_prog,
                                     weight_decay=args.weight_decay,
                                     scheduler=args.lr_scheduler,
                                     use_fp16=args.use_fp16,
                                     loss_scaling=args.loss_scaling)

    data_reader.decorate_batch_generator(batch_generator)

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    assert (args.init_from_checkpoint == "") or (args.init_from_pretrain_model
                                                 == "")

    # init from some checkpoint, to resume the previous training
    if args.init_from_checkpoint:
        save_load_io.init_from_checkpoint(args, exe, train_prog)

    # init from some pretrain models, to better solve the current task
    if args.init_from_pretrain_model:
        save_load_io.init_from_pretrain_model(args, exe, train_prog)

    build_strategy = fluid.compiler.BuildStrategy()
    build_strategy.enable_inplace = True

    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=loss.name, build_strategy=build_strategy)

    # start training
    steps = 0
    time_begin = time.time()
    ce_info = []
    for epoch_step in range(args.epoch):
        data_reader.start()
        while True:
            try:
                steps += 1
                if steps % args.print_steps == 0:
                    if warmup_steps <= 0:
                        if accuracy is not None:
                            fetch_list = [
                                loss.name, accuracy.name, num_seqs.name
                            ]
                        else:
                            fetch_list = [loss.name, num_seqs.name]
                    else:
                        if accuracy is not None:
                            fetch_list = [
                                loss.name, accuracy.name, optimizor.name,
                                num_seqs.name
                            ]
                        else:
                            fetch_list = [
                                loss.name, optimizor.name, num_seqs.name
                            ]
                else:
                    fetch_list = []

                outputs = exe.run(compiled_train_prog, fetch_list=fetch_list)

                if steps % args.print_steps == 0:
                    if warmup_steps <= 0:
                        if accuracy is not None:
                            np_loss, np_acc, np_num_seqs = outputs
                        else:
                            np_loss, np_num_seqs = outputs
                    else:
                        if accuracy is not None:
                            np_loss, np_acc, np_lr, np_num_seqs = outputs
                        else:
                            np_loss, np_lr, np_num_seqs = outputs

                    time_end = time.time()
                    used_time = time_end - time_begin
                    current_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                                 time.localtime(time.time()))
                    if accuracy is not None:
                        print(
                            "%s epoch: %d, step: %d, ave loss: %f, "
                            "ave acc: %f, speed: %f steps/s" %
                            (current_time, epoch_step, steps, np.mean(np_loss),
                             np.mean(np_acc), args.print_steps / used_time))
                        ce_info.append([
                            np.mean(np_loss),
                            np.mean(np_acc), args.print_steps / used_time
                        ])
                    else:
                        print("%s epoch: %d, step: %d, ave loss: %f, "
                              "speed: %f steps/s" %
                              (current_time, epoch_step, steps,
                               np.mean(np_loss), args.print_steps / used_time))
                        ce_info.append(
                            [np.mean(np_loss), args.print_steps / used_time])
                    time_begin = time.time()

                if steps % args.save_steps == 0:
                    save_path = "step_" + str(steps)
                    if args.save_checkpoint:
                        save_load_io.save_checkpoint(args, exe, train_prog,
                                                     save_path)
                    if args.save_param:
                        save_load_io.save_param(args, exe, train_prog,
                                                save_path)

            except fluid.core.EOFException:
                data_reader.reset()
                break
    if args.save_checkpoint:
        save_load_io.save_checkpoint(args, exe, train_prog, "step_final")
    if args.save_param:
        save_load_io.save_param(args, exe, train_prog, "step_final")

    def get_cards():
        num = 0
        cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
        print("test_cards", cards)
        if cards != '':
            num = len(cards.split(","))
        return num

    if args.enable_ce:
        card_num = get_cards()
        print("test_card_num", card_num)
        ce_loss = 0
        ce_acc = 0
        ce_time = 0
        try:
            ce_loss = ce_info[-2][0]
            ce_acc = ce_info[-2][1]
            ce_time = ce_info[-2][2]
        except:
            print("ce info error")
        print("kpis\teach_step_duration_%s_card%s\t%s" %
              (task_name, card_num, ce_time))
        print("kpis\ttrain_loss_%s_card%s\t%f" %
              (task_name, card_num, ce_loss))
        print("kpis\ttrain_acc_%s_card%s\t%f" % (task_name, card_num, ce_acc))