예제 #1
0
파일: main.py 프로젝트: yxl-0713/models
def infer():
    with fluid.dygraph.guard(place):
        processor = reader.SentaProcessor(data_dir=args.data_dir,
                                          vocab_path=args.vocab_path,
                                          random_seed=args.random_seed)

        infer_data_generator = processor.data_generator(
            batch_size=args.batch_size,
            phase='infer',
            epoch=args.epoch,
            shuffle=False)
        if args.model_type == 'cnn_net':
            model_infer = nets.CNN(args.vocab_size, args.batch_size,
                                   args.padding_size)
        elif args.model_type == 'bow_net':
            model_infer = nets.BOW(args.vocab_size, args.batch_size,
                                   args.padding_size)
        elif args.model_type == 'gru_net':
            model_infer = nets.GRU(args.vocab_size, args.batch_size,
                                   args.padding_size)
        elif args.model_type == 'bigru_net':
            model_infer = nets.BiGRU(args.vocab_size, args.batch_size,
                                     args.padding_size)
        print('Do inferring ...... ')
        restore, _ = fluid.load_dygraph(args.checkpoints)
        model_infer.set_dict(restore)
        model_infer.eval()
        total_acc, total_num_seqs = [], []
        steps = 0
        time_begin = time.time()
        for batch_id, data in enumerate(infer_data_generator()):
            steps += 1
            np_doc = np.array([
                np.pad(x[0][0:args.padding_size],
                       (0, args.padding_size - len(x[0][0:args.padding_size])),
                       'constant',
                       constant_values=(args.vocab_size)) for x in data
            ]).astype('int64').reshape(-1)
            doc = to_variable(np_doc)
            label = to_variable(
                np.array([x[1] for x in data
                          ]).astype('int64').reshape(args.batch_size, 1))
            _, _, acc = model_infer(doc, label)
            mask = (np_doc != args.vocab_size).astype('int32')
            word_num = np.sum(mask)
            total_acc.append(acc.numpy() * word_num)
            total_num_seqs.append(word_num)
        time_end = time.time()
        used_time = time_end - time_begin
        print("Final infer result: ave acc: %f, speed: %f steps/s" %
              (np.sum(total_acc) / np.sum(total_num_seqs), steps / used_time))
예제 #2
0
def model_loader(model_type, dataset):
    """
    This function loads model from net.py
    """
    if model_type == 'MP':
        net = nets.multilayer_perceptron(dataset)
    elif model_type == 'CNN':
        net = nets.CNN(dataset)
    elif model_type == 'CNN_kim':
        net = nets.CNN_kim(dataset)
    elif model_type == 'CNN_deep':
        net = nets.CNN_deep(dataset)
    else:
        raise ValueError(f'\nmodel_type: {model_type} is not recognized.')

    return net
예제 #3
0
def train():
    with fluid.dygraph.guard(place):
        if args.benchmark:
            args.epoch = 1
        processor = reader.SentaProcessor(data_dir=args.data_dir,
                                          vocab_path=args.vocab_path,
                                          random_seed=args.random_seed)
        num_labels = len(processor.get_labels())

        num_train_examples = processor.get_num_examples(phase="train")

        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        train_data_generator = processor.data_generator(
            batch_size=args.batch_size,
            phase='train',
            epoch=args.epoch,
            shuffle=True)

        eval_data_generator = processor.data_generator(
            batch_size=args.batch_size,
            phase='dev',
            epoch=args.epoch,
            shuffle=False)

        cnn_net = nets.CNN("cnn_net", args.vocab_size, args.batch_size,
                           args.padding_size)

        sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr)
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        length = len(list(enumerate(train_data_generator())))
        for eop in range(args.epoch):
            time_begin = time.time()
            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            progress = ProgressMeter(length,
                                     batch_time,
                                     data_time,
                                     prefix="epoch: [{}]".format(eop))
            end = Tools.time()
            for batch_id, data in enumerate(train_data_generator()):
                data_time.update(Tools.time() - end)
                steps += 1
                doc = to_variable(
                    np.array([
                        np.pad(x[0][0:args.padding_size],
                               (0, args.padding_size -
                                len(x[0][0:args.padding_size])),
                               'constant',
                               constant_values=(args.vocab_size)) for x in data
                    ]).astype('int64').reshape(-1, 1))

                label = to_variable(
                    np.array([x[1] for x in data
                              ]).astype('int64').reshape(args.batch_size, 1))

                cnn_net.train()
                avg_cost, prediction, acc = cnn_net(doc, label)
                avg_cost.backward()
                batch_time.update(Tools.time() - end)
                np_mask = (doc.numpy() != args.vocab_size).astype('int32')
                word_num = np.sum(np_mask)
                sgd_optimizer.minimize(avg_cost)
                cnn_net.clear_gradients()
                total_cost.append(avg_cost.numpy() * word_num)
                total_acc.append(acc.numpy() * word_num)
                total_num_seqs.append(word_num)

                if steps % args.skip_steps == 0:
                    time_end = time.time()
                    used_time = time_end - time_begin
                    progress.print(batch_id + 1)
                    #print("step: %d, ave loss: %f, "
                    #      "ave acc: %f, speed: %f steps/s" %
                    #      (steps, np.sum(total_cost) / np.sum(total_num_seqs),
                    #       np.sum(total_acc) / np.sum(total_num_seqs),
                    #       args.skip_steps / used_time))
                    total_cost, total_acc, total_num_seqs = [], [], []
                    time_begin = time.time()

                if steps % args.validation_steps == 0:
                    total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], []
                    cnn_net.eval()
                    eval_steps = 0
                    for eval_batch_id, eval_data in enumerate(
                            eval_data_generator()):
                        eval_np_doc = np.array([
                            np.pad(x[0][0:args.padding_size],
                                   (0, args.padding_size -
                                    len(x[0][0:args.padding_size])),
                                   'constant',
                                   constant_values=(args.vocab_size))
                            for x in eval_data
                        ]).astype('int64').reshape(1, -1)
                        eval_label = to_variable(
                            np.array([x[1] for x in eval_data
                                      ]).astype('int64').reshape(
                                          args.batch_size, 1))
                        eval_doc = to_variable(eval_np_doc.reshape(-1, 1))
                        eval_avg_cost, eval_prediction, eval_acc = cnn_net(
                            eval_doc, eval_label)

                        eval_np_mask = (eval_np_doc !=
                                        args.vocab_size).astype('int32')
                        eval_word_num = np.sum(eval_np_mask)
                        total_eval_cost.append(eval_avg_cost.numpy() *
                                               eval_word_num)
                        total_eval_acc.append(eval_acc.numpy() * eval_word_num)
                        total_eval_num_seqs.append(eval_word_num)

                        eval_steps += 1

                    time_end = time.time()
                    used_time = time_end - time_begin
                    print(
                        "Final validation result: step: %d, ave loss: %f, "
                        "ave acc: %f, speed: %f steps/s" %
                        (steps, np.sum(total_eval_cost) /
                         np.sum(total_eval_num_seqs), np.sum(total_eval_acc) /
                         np.sum(total_eval_num_seqs), eval_steps / used_time))
                    time_begin = time.time()

                # if steps % args.save_steps == 0:
                #     save_path = "save_dir_" + str(steps)
                #     print('save model to: ' + save_path)
                #     fluid.dygraph.save_persistables(cnn_net.state_dict(),
                #                                     save_path)
                end = Tools.time()
예제 #4
0
파일: main.py 프로젝트: walloollaw/models
def train():
    with fluid.dygraph.guard(place):
        if args.ce:
            print("ce mode")
            seed = 90
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
        processor = reader.SentaProcessor(data_dir=args.data_dir,
                                          vocab_path=args.vocab_path,
                                          random_seed=args.random_seed)
        num_labels = len(processor.get_labels())

        num_train_examples = processor.get_num_examples(phase="train")

        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        if not args.ce:
            train_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='train',
                epoch=args.epoch,
                shuffle=True)

            eval_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='dev',
                epoch=args.epoch,
                shuffle=False)
        else:
            train_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='train',
                epoch=args.epoch,
                shuffle=False)

            eval_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='dev',
                epoch=args.epoch,
                shuffle=False)
        cnn_net = nets.CNN("cnn_net", args.vocab_size, args.batch_size,
                           args.padding_size)

        sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr)
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []

        for eop in range(args.epoch):
            time_begin = time.time()
            for batch_id, data in enumerate(train_data_generator()):
                enable_profile = steps > args.profile_steps

                with profile_context(enable_profile):

                    steps += 1
                    doc = to_variable(
                        np.array([
                            np.pad(x[0][0:args.padding_size],
                                   (0, args.padding_size -
                                    len(x[0][0:args.padding_size])),
                                   'constant',
                                   constant_values=(args.vocab_size))
                            for x in data
                        ]).astype('int64').reshape(-1, 1))

                    label = to_variable(
                        np.array([x[1] for x in data]).astype('int64').reshape(
                            args.batch_size, 1))

                    cnn_net.train()
                    avg_cost, prediction, acc = cnn_net(doc, label)
                    avg_cost.backward()
                    np_mask = (doc.numpy() != args.vocab_size).astype('int32')
                    word_num = np.sum(np_mask)
                    sgd_optimizer.minimize(avg_cost)
                    cnn_net.clear_gradients()
                    total_cost.append(avg_cost.numpy() * word_num)
                    total_acc.append(acc.numpy() * word_num)
                    total_num_seqs.append(word_num)

                    if steps % args.skip_steps == 0:
                        time_end = time.time()
                        used_time = time_end - time_begin
                        print("step: %d, ave loss: %f, "
                              "ave acc: %f, speed: %f steps/s" %
                              (steps,
                               np.sum(total_cost) / np.sum(total_num_seqs),
                               np.sum(total_acc) / np.sum(total_num_seqs),
                               args.skip_steps / used_time))
                        total_cost, total_acc, total_num_seqs = [], [], []
                        time_begin = time.time()

                    if steps % args.validation_steps == 0:
                        total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], []
                        cnn_net.eval()
                        eval_steps = 0
                        for eval_batch_id, eval_data in enumerate(
                                eval_data_generator()):
                            eval_np_doc = np.array([
                                np.pad(x[0][0:args.padding_size],
                                       (0, args.padding_size -
                                        len(x[0][0:args.padding_size])),
                                       'constant',
                                       constant_values=(args.vocab_size))
                                for x in eval_data
                            ]).astype('int64').reshape(1, -1)
                            eval_label = to_variable(
                                np.array([x[1] for x in eval_data
                                          ]).astype('int64').reshape(
                                              args.batch_size, 1))
                            eval_doc = to_variable(eval_np_doc.reshape(-1, 1))
                            eval_avg_cost, eval_prediction, eval_acc = cnn_net(
                                eval_doc, eval_label)

                            eval_np_mask = (eval_np_doc !=
                                            args.vocab_size).astype('int32')
                            eval_word_num = np.sum(eval_np_mask)
                            total_eval_cost.append(eval_avg_cost.numpy() *
                                                   eval_word_num)
                            total_eval_acc.append(eval_acc.numpy() *
                                                  eval_word_num)
                            total_eval_num_seqs.append(eval_word_num)

                            eval_steps += 1

                        time_end = time.time()
                        used_time = time_end - time_begin
                        print(
                            "Final validation result: step: %d, ave loss: %f, "
                            "ave acc: %f, speed: %f steps/s" %
                            (steps, np.sum(total_eval_cost) /
                             np.sum(total_eval_num_seqs),
                             np.sum(total_eval_acc) /
                             np.sum(total_eval_num_seqs),
                             eval_steps / used_time))
                        time_begin = time.time()
                        if args.ce:
                            print("kpis\ttrain_loss\t%0.3f" %
                                  (np.sum(total_eval_cost) /
                                   np.sum(total_eval_num_seqs)))
                            print("kpis\ttrain_acc\t%0.3f" %
                                  (np.sum(total_eval_acc) /
                                   np.sum(total_eval_num_seqs)))

                    if steps % args.save_steps == 0:
                        save_path = "save_dir_" + str(steps)
                        print('save model to: ' + save_path)
                        fluid.dygraph.save_persistables(
                            cnn_net.state_dict(), save_path)
                if enable_profile:
                    print('save profile result into /tmp/profile_file')
                    return
예제 #5
0
파일: main.py 프로젝트: yxl-0713/models
def train():
    with fluid.dygraph.guard(place):
        if args.ce:
            print("ce mode")
            seed = 90
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
        processor = reader.SentaProcessor(data_dir=args.data_dir,
                                          vocab_path=args.vocab_path,
                                          random_seed=args.random_seed)
        num_labels = len(processor.get_labels())

        num_train_examples = processor.get_num_examples(phase="train")

        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        if not args.ce:
            train_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='train',
                epoch=args.epoch,
                shuffle=True)

            eval_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='dev',
                epoch=args.epoch,
                shuffle=False)
        else:
            train_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='train',
                epoch=args.epoch,
                shuffle=False)

            eval_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='dev',
                epoch=args.epoch,
                shuffle=False)
        if args.model_type == 'cnn_net':
            model = nets.CNN(args.vocab_size, args.batch_size,
                             args.padding_size)
        elif args.model_type == 'bow_net':
            model = nets.BOW(args.vocab_size, args.batch_size,
                             args.padding_size)
        elif args.model_type == 'gru_net':
            model = nets.GRU(args.vocab_size, args.batch_size,
                             args.padding_size)
        elif args.model_type == 'bigru_net':
            model = nets.BiGRU(args.vocab_size, args.batch_size,
                               args.padding_size)
        sgd_optimizer = fluid.optimizer.Adagrad(
            learning_rate=args.lr, parameter_list=model.parameters())
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        gru_hidden_data = np.zeros((args.batch_size, 128), dtype='float32')
        ce_time, ce_infor = [], []
        for eop in range(args.epoch):
            time_begin = time.time()
            for batch_id, data in enumerate(train_data_generator()):
                enable_profile = steps > args.profile_steps
                with profile_context(enable_profile):
                    steps += 1
                    doc = to_variable(
                        np.array([
                            np.pad(x[0][0:args.padding_size],
                                   (0, args.padding_size -
                                    len(x[0][0:args.padding_size])),
                                   'constant',
                                   constant_values=(args.vocab_size))
                            for x in data
                        ]).astype('int64').reshape(-1))
                    label = to_variable(
                        np.array([x[1] for x in data]).astype('int64').reshape(
                            args.batch_size, 1))
                    model.train()
                    avg_cost, prediction, acc = model(doc, label)
                    avg_cost.backward()
                    np_mask = (doc.numpy() != args.vocab_size).astype('int32')
                    word_num = np.sum(np_mask)
                    sgd_optimizer.minimize(avg_cost)
                    model.clear_gradients()
                    total_cost.append(avg_cost.numpy() * word_num)
                    total_acc.append(acc.numpy() * word_num)
                    total_num_seqs.append(word_num)

                    if steps % args.skip_steps == 0:
                        time_end = time.time()
                        used_time = time_end - time_begin
                        print("step: %d, ave loss: %f, "
                              "ave acc: %f, speed: %f steps/s" %
                              (steps,
                               np.sum(total_cost) / np.sum(total_num_seqs),
                               np.sum(total_acc) / np.sum(total_num_seqs),
                               args.skip_steps / used_time))
                        ce_time.append(used_time)
                        ce_infor.append(
                            np.sum(total_acc) / np.sum(total_num_seqs))
                        total_cost, total_acc, total_num_seqs = [], [], []
                        time_begin = time.time()

                    if steps % args.validation_steps == 0:
                        total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], []
                        model.eval()
                        eval_steps = 0
                        gru_hidden_data = np.zeros((args.batch_size, 128),
                                                   dtype='float32')
                        for eval_batch_id, eval_data in enumerate(
                                eval_data_generator()):
                            eval_np_doc = np.array([
                                np.pad(x[0][0:args.padding_size],
                                       (0, args.padding_size -
                                        len(x[0][0:args.padding_size])),
                                       'constant',
                                       constant_values=(args.vocab_size))
                                for x in eval_data
                            ]).astype('int64').reshape(-1)
                            eval_label = to_variable(
                                np.array([x[1] for x in eval_data
                                          ]).astype('int64').reshape(
                                              args.batch_size, 1))
                            eval_doc = to_variable(eval_np_doc)
                            eval_avg_cost, eval_prediction, eval_acc = model(
                                eval_doc, eval_label)
                            eval_np_mask = (eval_np_doc !=
                                            args.vocab_size).astype('int32')
                            eval_word_num = np.sum(eval_np_mask)
                            total_eval_cost.append(eval_avg_cost.numpy() *
                                                   eval_word_num)
                            total_eval_acc.append(eval_acc.numpy() *
                                                  eval_word_num)
                            total_eval_num_seqs.append(eval_word_num)

                            eval_steps += 1

                        time_end = time.time()
                        used_time = time_end - time_begin
                        print(
                            "Final validation result: step: %d, ave loss: %f, "
                            "ave acc: %f, speed: %f steps/s" %
                            (steps, np.sum(total_eval_cost) /
                             np.sum(total_eval_num_seqs),
                             np.sum(total_eval_acc) /
                             np.sum(total_eval_num_seqs),
                             eval_steps / used_time))
                        time_begin = time.time()
                        if args.ce:
                            print("kpis\ttrain_loss\t%0.3f" %
                                  (np.sum(total_eval_cost) /
                                   np.sum(total_eval_num_seqs)))
                            print("kpis\ttrain_acc\t%0.3f" %
                                  (np.sum(total_eval_acc) /
                                   np.sum(total_eval_num_seqs)))

                    if steps % args.save_steps == 0:
                        save_path = args.checkpoints + "/" + "save_dir_" + str(
                            steps)
                        print('save model to: ' + save_path)
                        fluid.dygraph.save_dygraph(model.state_dict(),
                                                   save_path)
                if enable_profile:
                    print('save profile result into /tmp/profile_file')
                    return
        if args.ce:
            card_num = get_cards()
            _acc = 0
            _time = 0
            try:
                _time = ce_time[-1]
                _acc = ce_infor[-1]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time))
            print("kpis\ttrain_acc_card%s\t%f" % (card_num, _acc))
예제 #6
0
def train():
    # with fluid.dygraph.guard(place):
    with fluid.dygraph.guard():
        if args.ce:
            print("ce mode")
            seed = args.random_seed
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
        processor = reader.SentaProcessor(data_dir=args.data_dir,
                                          vocab_path=args.vocab_path,
                                          random_seed=args.random_seed)
        num_labels = len(processor.get_labels())

        if not args.ce:
            train_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='train',
                epoch=args.epoch,
                shuffle=True)

            eval_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='dev',
                epoch=args.epoch,
                shuffle=False)
        else:
            train_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='train',
                epoch=args.epoch,
                shuffle=False)

            eval_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='dev',
                epoch=args.epoch,
                shuffle=False)

        model = nets.CNN(args.vocab_size)

        # save initial param to files
        param_dict = {}
        for param_name in model.state_dict():

            param_dict[param_name] = model.state_dict()[param_name].numpy()
            if 'embedding' in param_name:
                state_dict = model.state_dict()
                param_dict[param_name][0] = 0
                state_dict[param_name] = paddle.to_tensor(
                    param_dict[param_name])
                model.set_dict(state_dict)
                # print(param_dict[param_name][0])
        np.savez('./paramters.npz', **param_dict)
        for parameters in model.named_parameters():
            print(parameters[0])
            if 'embedding' in parameters[0]:
                print(model.state_dict()[parameters[0]][0].shape)

        # sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr,parameter_list=model.parameters())
        sgd_optimizer = paddle.fluid.optimizer.SGD(
            learning_rate=args.lr, parameter_list=model.parameters())
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        gru_hidden_data = np.zeros((args.batch_size, 128), dtype='float64')
        ce_time, ce_infor = [], []
        reader_time = 0.0

        num_train_examples = processor.get_num_examples(phase="train")

        for eop in range(args.epoch):
            time_begin = time.time()
            for batch_id, data in enumerate(train_data_generator()):
                reader_begin = time.time()
                seq_len_arr = np.array([len(x[0]) for x in data],
                                       dtype="int64")
                steps += 1
                seq_len = paddle.to_tensor(seq_len_arr)
                doc = paddle.to_tensor(
                    np.array([
                        np.pad(x[0][0:args.padding_size],
                               (0, args.padding_size -
                                len(x[0][0:args.padding_size])),
                               'constant',
                               constant_values=0) for x in data
                    ]).astype('int64'))

                label = paddle.to_tensor(
                    np.array([x[1] for x in data
                              ]).astype('int64').reshape(args.batch_size, 1))

                reader_end = time.time()
                reader_time += (reader_end - reader_begin)
                model.train()

                avg_cost, prediction, acc = model(doc, seq_len,
                                                  args.padding_size, label)
                model.clear_gradients()
                avg_cost.backward()

                sgd_optimizer.minimize(avg_cost)

                # np_mask = (doc.numpy() != 0).astype('int32')
                # word_num = np.sum(np_mask)
                word_num = np.sum(seq_len_arr)

                total_cost.append(avg_cost.numpy() * word_num)
                total_acc.append(acc.numpy() * word_num)
                total_num_seqs.append(word_num)

                if steps % args.skip_steps == 0:
                    time_end = time.time()
                    used_time = time_end - time_begin

                    print(
                        "step: %d, ave loss: %f, "
                        "ave acc: %f, speed: %f steps/s, reader speed: %f steps/s"
                        % (steps, np.sum(total_cost) / np.sum(total_num_seqs),
                           np.sum(total_acc) / np.sum(total_num_seqs),
                           args.skip_steps / used_time,
                           args.skip_steps / reader_time))
                    reader_time = 0.0
                    ce_time.append(used_time)
                    ce_infor.append(np.sum(total_acc) / np.sum(total_num_seqs))
                    total_cost, total_acc, total_num_seqs = [], [], []
                    time_begin = time.time()

                    # if steps % args.validation_steps == 0:
                    #     total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], []
                    #     model.eval()
                    #     eval_steps = 0
                    #     gru_hidden_data = np.zeros((args.batch_size, 128), dtype='float64')
                    #     for eval_batch_id, eval_data in enumerate(
                    #             eval_data_generator()):
                    #         eval_seq_arr = np.array([len(x[0]) for x in data], dtype="int64")
                    #         eval_seq_len = to_variable(eval_seq_arr)
                    #         eval_np_doc = np.array([
                    #             np.pad(x[0][0:args.padding_size],
                    #                    (0, args.padding_size -
                    #                     len(x[0][0:args.padding_size])),
                    #                    'constant',
                    #                    constant_values=0) # args.vocab_size))
                    #             for x in eval_data
                    #         ]).astype('int64')# .reshape(-1)
                    #         eval_label = to_variable(
                    #             np.array([x[1] for x in eval_data]).astype(
                    #                 'int64').reshape(args.batch_size, 1))
                    #         eval_doc = to_variable(eval_np_doc)
                    #         eval_avg_cost, eval_prediction, eval_acc = model(
                    #             eval_doc, eval_seq_len, args.padding_size, eval_label)
                    #         eval_np_mask = (
                    #             eval_np_doc != 0).astype('int32')
                    #             # eval_np_doc != args.vocab_size).astype('int32')
                    #         # eval_word_num = np.sum(eval_np_mask)
                    #         eval_word_num = np.sum(eval_seq_arr)
                    #         total_eval_cost.append(eval_avg_cost.numpy() *
                    #                                eval_word_num)
                    #         total_eval_acc.append(eval_acc.numpy() *
                    #                               eval_word_num)
                    #         total_eval_num_seqs.append(eval_word_num)

                    #         eval_steps += 1

                    #     time_end = time.time()
                    #     used_time = time_end - time_begin
                    #     print(
                    #         "Final validation result: step: %d, ave loss: %f, "
                    #         "ave acc: %f, speed: %f steps/s" %
                    #         (steps, np.sum(total_eval_cost) /
                    #          np.sum(total_eval_num_seqs), np.sum(total_eval_acc)
                    #          / np.sum(total_eval_num_seqs),
                    #          eval_steps / used_time))
                    #     time_begin = time.time()
                    #     if args.ce:
                    #         print("kpis\ttrain_loss\t%0.3f" %
                    #               (np.sum(total_eval_cost) /
                    #                np.sum(total_eval_num_seqs)))
                    #         print("kpis\ttrain_acc\t%0.3f" %
                    #               (np.sum(total_eval_acc) /
                    #                np.sum(total_eval_num_seqs)))

                    # if steps % args.save_steps == 0:
                    #     save_path = args.checkpoints+"/"+"save_dir_" + str(steps)
                    #     print('save model to: ' + save_path)
                    #     fluid.dygraph.save_dygraph(model.state_dict(),
                    #                                save_path)
                    # fluid.dygraph.save_dygraph(model.state_dict(),
                    #  save_path)
        if args.ce:
            card_num = get_cards()
            _acc = 0
            _time = 0
            try:
                _time = ce_time[-1]
                _acc = ce_infor[-1]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time))
            print("kpis\ttrain_acc_card%s\t%f" % (card_num, _acc))