Esempio n. 1
0
def infer():
    with fluid.dygraph.guard(place):
        processor = reader.SentaProcessor(data_dir=args.data_dir,
                                          vocab_path=args.vocab_path,
                                          random_seed=args.random_seed)

        infer_data_generator = processor.data_generator(
            batch_size=args.batch_size,
            phase='infer',
            epoch=args.epoch,
            shuffle=False)
        if args.model_type == 'cnn_net':
            model_infer = nets.CNN(args.vocab_size, args.batch_size,
                                   args.padding_size)
        elif args.model_type == 'bow_net':
            model_infer = nets.BOW(args.vocab_size, args.batch_size,
                                   args.padding_size)
        elif args.model_type == 'gru_net':
            model_infer = nets.GRU(args.vocab_size, args.batch_size,
                                   args.padding_size)
        elif args.model_type == 'bigru_net':
            model_infer = nets.BiGRU(args.vocab_size, args.batch_size,
                                     args.padding_size)
        print('Do inferring ...... ')
        restore, _ = fluid.load_dygraph(args.checkpoints)
        model_infer.set_dict(restore)
        model_infer.eval()
        total_acc, total_num_seqs = [], []
        steps = 0
        time_begin = time.time()
        for batch_id, data in enumerate(infer_data_generator()):
            steps += 1
            np_doc = np.array([
                np.pad(x[0][0:args.padding_size],
                       (0, args.padding_size - len(x[0][0:args.padding_size])),
                       'constant',
                       constant_values=(args.vocab_size)) for x in data
            ]).astype('int64').reshape(-1)
            doc = to_variable(np_doc)
            label = to_variable(
                np.array([x[1] for x in data
                          ]).astype('int64').reshape(args.batch_size, 1))
            _, _, acc = model_infer(doc, label)
            mask = (np_doc != args.vocab_size).astype('int32')
            word_num = np.sum(mask)
            total_acc.append(acc.numpy() * word_num)
            total_num_seqs.append(word_num)
        time_end = time.time()
        used_time = time_end - time_begin
        print("Final infer result: ave acc: %f, speed: %f steps/s" %
              (np.sum(total_acc) / np.sum(total_num_seqs), steps / used_time))
Esempio n. 2
0
def test_inference_model(args):
    if args.use_cuda:
        dev_count = fluid.core.get_cuda_device_count()
        place = fluid.CUDAPlace(0)
    else:
        dev_count = int(os.environ.get('CPU_NUM', 1))
        place = fluid.CPUPlace()
    
    exe = fluid.Executor(place)
    test_prog = fluid.Program()
    startup_prog = fluid.Program()

    with fluid.program_guard(test_prog, startup_prog):
        with fluid.unique_name.guard():
            infer_pyreader, probs, feed_target_names = create_model(
                args,
                pyreader_name='infer_reader',
                num_labels=args.num_labels,
                is_prediction=True)

    test_prog = test_prog.clone(for_test=True)
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    processor = reader.SentaProcessor(data_dir=args.data_dir,
        vocab_path=args.vocab_path,
        random_seed=args.random_seed,
        max_seq_len=args.max_seq_len)

    num_labels = len(processor.get_labels())

    assert (args.inference_model_dir)
    infer_program, feed_names, fetch_targets = fluid.io.load_inference_model(
        dirname=args.inference_model_dir,
        executor=exe,
        model_filename="model.pdmodel",
        params_filename="params.pdparams")

    infer_data_generator = processor.data_generator(
        batch_size=args.batch_size/dev_count,
        phase="infer",
        epoch=1,
        shuffle=False)
    
    infer_pyreader.set_sample_list_generator(infer_data_generator)
    inference(exe, test_prog, infer_pyreader,
        [probs.name], "infer")
Esempio n. 3
0
def train():
    with fluid.dygraph.guard(place):
        if args.benchmark:
            args.epoch = 1
        processor = reader.SentaProcessor(data_dir=args.data_dir,
                                          vocab_path=args.vocab_path,
                                          random_seed=args.random_seed)
        num_labels = len(processor.get_labels())

        num_train_examples = processor.get_num_examples(phase="train")

        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        train_data_generator = processor.data_generator(
            batch_size=args.batch_size,
            phase='train',
            epoch=args.epoch,
            shuffle=True)

        eval_data_generator = processor.data_generator(
            batch_size=args.batch_size,
            phase='dev',
            epoch=args.epoch,
            shuffle=False)

        cnn_net = nets.CNN("cnn_net", args.vocab_size, args.batch_size,
                           args.padding_size)

        sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr)
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        length = len(list(enumerate(train_data_generator())))
        for eop in range(args.epoch):
            time_begin = time.time()
            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            progress = ProgressMeter(length,
                                     batch_time,
                                     data_time,
                                     prefix="epoch: [{}]".format(eop))
            end = Tools.time()
            for batch_id, data in enumerate(train_data_generator()):
                data_time.update(Tools.time() - end)
                steps += 1
                doc = to_variable(
                    np.array([
                        np.pad(x[0][0:args.padding_size],
                               (0, args.padding_size -
                                len(x[0][0:args.padding_size])),
                               'constant',
                               constant_values=(args.vocab_size)) for x in data
                    ]).astype('int64').reshape(-1, 1))

                label = to_variable(
                    np.array([x[1] for x in data
                              ]).astype('int64').reshape(args.batch_size, 1))

                cnn_net.train()
                avg_cost, prediction, acc = cnn_net(doc, label)
                avg_cost.backward()
                batch_time.update(Tools.time() - end)
                np_mask = (doc.numpy() != args.vocab_size).astype('int32')
                word_num = np.sum(np_mask)
                sgd_optimizer.minimize(avg_cost)
                cnn_net.clear_gradients()
                total_cost.append(avg_cost.numpy() * word_num)
                total_acc.append(acc.numpy() * word_num)
                total_num_seqs.append(word_num)

                if steps % args.skip_steps == 0:
                    time_end = time.time()
                    used_time = time_end - time_begin
                    progress.print(batch_id + 1)
                    #print("step: %d, ave loss: %f, "
                    #      "ave acc: %f, speed: %f steps/s" %
                    #      (steps, np.sum(total_cost) / np.sum(total_num_seqs),
                    #       np.sum(total_acc) / np.sum(total_num_seqs),
                    #       args.skip_steps / used_time))
                    total_cost, total_acc, total_num_seqs = [], [], []
                    time_begin = time.time()

                if steps % args.validation_steps == 0:
                    total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], []
                    cnn_net.eval()
                    eval_steps = 0
                    for eval_batch_id, eval_data in enumerate(
                            eval_data_generator()):
                        eval_np_doc = np.array([
                            np.pad(x[0][0:args.padding_size],
                                   (0, args.padding_size -
                                    len(x[0][0:args.padding_size])),
                                   'constant',
                                   constant_values=(args.vocab_size))
                            for x in eval_data
                        ]).astype('int64').reshape(1, -1)
                        eval_label = to_variable(
                            np.array([x[1] for x in eval_data
                                      ]).astype('int64').reshape(
                                          args.batch_size, 1))
                        eval_doc = to_variable(eval_np_doc.reshape(-1, 1))
                        eval_avg_cost, eval_prediction, eval_acc = cnn_net(
                            eval_doc, eval_label)

                        eval_np_mask = (eval_np_doc !=
                                        args.vocab_size).astype('int32')
                        eval_word_num = np.sum(eval_np_mask)
                        total_eval_cost.append(eval_avg_cost.numpy() *
                                               eval_word_num)
                        total_eval_acc.append(eval_acc.numpy() * eval_word_num)
                        total_eval_num_seqs.append(eval_word_num)

                        eval_steps += 1

                    time_end = time.time()
                    used_time = time_end - time_begin
                    print(
                        "Final validation result: step: %d, ave loss: %f, "
                        "ave acc: %f, speed: %f steps/s" %
                        (steps, np.sum(total_eval_cost) /
                         np.sum(total_eval_num_seqs), np.sum(total_eval_acc) /
                         np.sum(total_eval_num_seqs), eval_steps / used_time))
                    time_begin = time.time()

                # if steps % args.save_steps == 0:
                #     save_path = "save_dir_" + str(steps)
                #     print('save model to: ' + save_path)
                #     fluid.dygraph.save_persistables(cnn_net.state_dict(),
                #                                     save_path)
                end = Tools.time()
Esempio n. 4
0
def main(args):
    """
    Main Function
    """
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = 1
    exe = fluid.Executor(place)

    task_name = args.task_name.lower()
    processor = reader.SentaProcessor(data_dir=args.data_dir,
                                      vocab_path=args.vocab_path,
                                      random_seed=args.random_seed,
                                      max_seq_len=args.max_seq_len)
    num_labels = len(processor.get_labels())

    if not (args.do_train or args.do_val or args.do_infer):
        raise ValueError("For args `do_train`, `do_val` and `do_infer`, at "
                         "least one of them must be True.")

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    if args.do_train:
        train_data_generator = processor.data_generator(
            batch_size=args.batch_size / dev_count,
            phase='train',
            epoch=args.epoch,
            shuffle=True)

        num_train_examples = processor.get_num_examples(phase="train")

        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        print("Device count: %d" % dev_count)
        print("Num train examples: %d" % num_train_examples)
        print("Max train steps: %d" % max_train_steps)

        train_program = fluid.Program()
        if args.enable_ce and args.random_seed is not None:
            train_program.random_seed = args.random_seed

        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                train_reader, loss, accuracy, num_seqs = create_model(
                    args,
                    pyreader_name='train_reader',
                    num_labels=num_labels,
                    is_prediction=False)

                sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr)
                sgd_optimizer.minimize(loss)

        if args.verbose:
            lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                program=train_program, batch_size=args.batch_size)
            print("Theoretical memory usage in training: %.3f - %.3f %s" %
                  (lower_mem, upper_mem, unit))

    if args.do_val:
        test_data_generator = processor.data_generator(
            batch_size=args.batch_size / dev_count,
            phase='dev',
            epoch=1,
            shuffle=False)
        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
                test_reader, loss, accuracy, num_seqs = create_model(
                    args,
                    pyreader_name='test_reader',
                    num_labels=num_labels,
                    is_prediction=False)

        test_prog = test_prog.clone(for_test=True)

    if args.do_infer:
        infer_data_generator = processor.data_generator(
            batch_size=args.batch_size / dev_count,
            phase='infer',
            epoch=1,
            shuffle=False)
        infer_prog = fluid.Program()
        with fluid.program_guard(infer_prog, startup_prog):
            with fluid.unique_name.guard():
                infer_reader, prop, _ = create_model(
                    args,
                    pyreader_name='infer_reader',
                    num_labels=num_labels,
                    is_prediction=True)
        infer_prog = infer_prog.clone(for_test=True)

    exe.run(startup_prog)

    if args.do_train:
        if args.init_checkpoint:
            init_checkpoint(exe,
                            args.init_checkpoint,
                            main_program=startup_prog)

    elif args.do_val or args.do_infer:
        if not args.init_checkpoint:
            raise ValueError("args 'init_checkpoint' should be set if"
                             "only doing validation or testing!")
        init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog)

    if args.do_train:
        train_exe = exe
        train_reader.set_sample_list_generator(train_data_generator)
    else:
        train_exe = None
    if args.do_val:
        test_exe = exe
        test_reader.set_sample_list_generator(test_data_generator)
    if args.do_infer:
        test_exe = exe
        infer_reader.set_sample_list_generator(infer_data_generator)

    if args.do_train:
        train_reader.start()
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        time_begin = time.time()
        while True:
            try:
                steps += 1
                #print("steps...")
                if steps % args.skip_steps == 0:
                    fetch_list = [loss.name, accuracy.name, num_seqs.name]
                else:
                    fetch_list = []

                outputs = train_exe.run(program=train_program,
                                        fetch_list=fetch_list,
                                        return_numpy=False)
                #print("finished one step")
                if steps % args.skip_steps == 0:
                    np_loss, np_acc, np_num_seqs = outputs
                    np_loss = np.array(np_loss)
                    np_acc = np.array(np_acc)
                    np_num_seqs = np.array(np_num_seqs)
                    total_cost.extend(np_loss * np_num_seqs)
                    total_acc.extend(np_acc * np_num_seqs)
                    total_num_seqs.extend(np_num_seqs)

                    if args.verbose:
                        verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size(
                        )
                        print(verbose)

                    time_end = time.time()
                    used_time = time_end - time_begin
                    print("step: %d, ave loss: %f, "
                          "ave acc: %f, speed: %f steps/s" %
                          (steps, np.sum(total_cost) / np.sum(total_num_seqs),
                           np.sum(total_acc) / np.sum(total_num_seqs),
                           args.skip_steps / used_time))
                    total_cost, total_acc, total_num_seqs = [], [], []
                    time_begin = time.time()

                if steps % args.save_steps == 0:
                    save_path = os.path.join(args.checkpoints,
                                             "step_" + str(steps),
                                             "checkpoint")
                    fluid.save(train_program, save_path)

                if steps % args.validation_steps == 0:
                    # evaluate dev set
                    if args.do_val:
                        print("do evalatation")
                        evaluate(exe, test_prog, test_reader,
                                 [loss.name, accuracy.name, num_seqs.name],
                                 "dev")

            except fluid.core.EOFException:
                save_path = os.path.join(args.checkpoints,
                                         "step_" + str(steps), "checkpoint")
                fluid.save(train_program, save_path)
                train_reader.reset()
                break

    # final eval on dev set
    if args.do_val:
        print("Final validation result:")
        evaluate(exe, test_prog, test_reader,
                 [loss.name, accuracy.name, num_seqs.name], "dev")

    # final eval on test set
    if args.do_infer:
        print("Final test result:")
        inference(exe, infer_prog, infer_reader, [prop.name], "infer")
Esempio n. 5
0
def train():
    with fluid.dygraph.guard(place):
        if args.ce:
            print("ce mode")
            seed = 90
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
        processor = reader.SentaProcessor(data_dir=args.data_dir,
                                          vocab_path=args.vocab_path,
                                          random_seed=args.random_seed)
        num_labels = len(processor.get_labels())

        num_train_examples = processor.get_num_examples(phase="train")

        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        if not args.ce:
            train_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='train',
                epoch=args.epoch,
                shuffle=True)

            eval_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='dev',
                epoch=args.epoch,
                shuffle=False)
        else:
            train_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='train',
                epoch=args.epoch,
                shuffle=False)

            eval_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='dev',
                epoch=args.epoch,
                shuffle=False)
        cnn_net = nets.CNN("cnn_net", args.vocab_size, args.batch_size,
                           args.padding_size)

        sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr)
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []

        for eop in range(args.epoch):
            time_begin = time.time()
            for batch_id, data in enumerate(train_data_generator()):
                enable_profile = steps > args.profile_steps

                with profile_context(enable_profile):

                    steps += 1
                    doc = to_variable(
                        np.array([
                            np.pad(x[0][0:args.padding_size],
                                   (0, args.padding_size -
                                    len(x[0][0:args.padding_size])),
                                   'constant',
                                   constant_values=(args.vocab_size))
                            for x in data
                        ]).astype('int64').reshape(-1, 1))

                    label = to_variable(
                        np.array([x[1] for x in data]).astype('int64').reshape(
                            args.batch_size, 1))

                    cnn_net.train()
                    avg_cost, prediction, acc = cnn_net(doc, label)
                    avg_cost.backward()
                    np_mask = (doc.numpy() != args.vocab_size).astype('int32')
                    word_num = np.sum(np_mask)
                    sgd_optimizer.minimize(avg_cost)
                    cnn_net.clear_gradients()
                    total_cost.append(avg_cost.numpy() * word_num)
                    total_acc.append(acc.numpy() * word_num)
                    total_num_seqs.append(word_num)

                    if steps % args.skip_steps == 0:
                        time_end = time.time()
                        used_time = time_end - time_begin
                        print("step: %d, ave loss: %f, "
                              "ave acc: %f, speed: %f steps/s" %
                              (steps,
                               np.sum(total_cost) / np.sum(total_num_seqs),
                               np.sum(total_acc) / np.sum(total_num_seqs),
                               args.skip_steps / used_time))
                        total_cost, total_acc, total_num_seqs = [], [], []
                        time_begin = time.time()

                    if steps % args.validation_steps == 0:
                        total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], []
                        cnn_net.eval()
                        eval_steps = 0
                        for eval_batch_id, eval_data in enumerate(
                                eval_data_generator()):
                            eval_np_doc = np.array([
                                np.pad(x[0][0:args.padding_size],
                                       (0, args.padding_size -
                                        len(x[0][0:args.padding_size])),
                                       'constant',
                                       constant_values=(args.vocab_size))
                                for x in eval_data
                            ]).astype('int64').reshape(1, -1)
                            eval_label = to_variable(
                                np.array([x[1] for x in eval_data
                                          ]).astype('int64').reshape(
                                              args.batch_size, 1))
                            eval_doc = to_variable(eval_np_doc.reshape(-1, 1))
                            eval_avg_cost, eval_prediction, eval_acc = cnn_net(
                                eval_doc, eval_label)

                            eval_np_mask = (eval_np_doc !=
                                            args.vocab_size).astype('int32')
                            eval_word_num = np.sum(eval_np_mask)
                            total_eval_cost.append(eval_avg_cost.numpy() *
                                                   eval_word_num)
                            total_eval_acc.append(eval_acc.numpy() *
                                                  eval_word_num)
                            total_eval_num_seqs.append(eval_word_num)

                            eval_steps += 1

                        time_end = time.time()
                        used_time = time_end - time_begin
                        print(
                            "Final validation result: step: %d, ave loss: %f, "
                            "ave acc: %f, speed: %f steps/s" %
                            (steps, np.sum(total_eval_cost) /
                             np.sum(total_eval_num_seqs),
                             np.sum(total_eval_acc) /
                             np.sum(total_eval_num_seqs),
                             eval_steps / used_time))
                        time_begin = time.time()
                        if args.ce:
                            print("kpis\ttrain_loss\t%0.3f" %
                                  (np.sum(total_eval_cost) /
                                   np.sum(total_eval_num_seqs)))
                            print("kpis\ttrain_acc\t%0.3f" %
                                  (np.sum(total_eval_acc) /
                                   np.sum(total_eval_num_seqs)))

                    if steps % args.save_steps == 0:
                        save_path = "save_dir_" + str(steps)
                        print('save model to: ' + save_path)
                        fluid.dygraph.save_persistables(
                            cnn_net.state_dict(), save_path)
                if enable_profile:
                    print('save profile result into /tmp/profile_file')
                    return
Esempio n. 6
0
def train():
    with fluid.dygraph.guard(place):
        if args.ce:
            print("ce mode")
            seed = 90
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
        processor = reader.SentaProcessor(data_dir=args.data_dir,
                                          vocab_path=args.vocab_path,
                                          random_seed=args.random_seed)
        num_labels = len(processor.get_labels())

        num_train_examples = processor.get_num_examples(phase="train")

        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        if not args.ce:
            train_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='train',
                epoch=args.epoch,
                shuffle=True)

            eval_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='dev',
                epoch=args.epoch,
                shuffle=False)
        else:
            train_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='train',
                epoch=args.epoch,
                shuffle=False)

            eval_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='dev',
                epoch=args.epoch,
                shuffle=False)
        if args.model_type == 'cnn_net':
            model = nets.CNN(args.vocab_size, args.batch_size,
                             args.padding_size)
        elif args.model_type == 'bow_net':
            model = nets.BOW(args.vocab_size, args.batch_size,
                             args.padding_size)
        elif args.model_type == 'gru_net':
            model = nets.GRU(args.vocab_size, args.batch_size,
                             args.padding_size)
        elif args.model_type == 'bigru_net':
            model = nets.BiGRU(args.vocab_size, args.batch_size,
                               args.padding_size)
        sgd_optimizer = fluid.optimizer.Adagrad(
            learning_rate=args.lr, parameter_list=model.parameters())
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        gru_hidden_data = np.zeros((args.batch_size, 128), dtype='float32')
        ce_time, ce_infor = [], []
        for eop in range(args.epoch):
            time_begin = time.time()
            for batch_id, data in enumerate(train_data_generator()):
                enable_profile = steps > args.profile_steps
                with profile_context(enable_profile):
                    steps += 1
                    doc = to_variable(
                        np.array([
                            np.pad(x[0][0:args.padding_size],
                                   (0, args.padding_size -
                                    len(x[0][0:args.padding_size])),
                                   'constant',
                                   constant_values=(args.vocab_size))
                            for x in data
                        ]).astype('int64').reshape(-1))
                    label = to_variable(
                        np.array([x[1] for x in data]).astype('int64').reshape(
                            args.batch_size, 1))
                    model.train()
                    avg_cost, prediction, acc = model(doc, label)
                    avg_cost.backward()
                    np_mask = (doc.numpy() != args.vocab_size).astype('int32')
                    word_num = np.sum(np_mask)
                    sgd_optimizer.minimize(avg_cost)
                    model.clear_gradients()
                    total_cost.append(avg_cost.numpy() * word_num)
                    total_acc.append(acc.numpy() * word_num)
                    total_num_seqs.append(word_num)

                    if steps % args.skip_steps == 0:
                        time_end = time.time()
                        used_time = time_end - time_begin
                        print("step: %d, ave loss: %f, "
                              "ave acc: %f, speed: %f steps/s" %
                              (steps,
                               np.sum(total_cost) / np.sum(total_num_seqs),
                               np.sum(total_acc) / np.sum(total_num_seqs),
                               args.skip_steps / used_time))
                        ce_time.append(used_time)
                        ce_infor.append(
                            np.sum(total_acc) / np.sum(total_num_seqs))
                        total_cost, total_acc, total_num_seqs = [], [], []
                        time_begin = time.time()

                    if steps % args.validation_steps == 0:
                        total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], []
                        model.eval()
                        eval_steps = 0
                        gru_hidden_data = np.zeros((args.batch_size, 128),
                                                   dtype='float32')
                        for eval_batch_id, eval_data in enumerate(
                                eval_data_generator()):
                            eval_np_doc = np.array([
                                np.pad(x[0][0:args.padding_size],
                                       (0, args.padding_size -
                                        len(x[0][0:args.padding_size])),
                                       'constant',
                                       constant_values=(args.vocab_size))
                                for x in eval_data
                            ]).astype('int64').reshape(-1)
                            eval_label = to_variable(
                                np.array([x[1] for x in eval_data
                                          ]).astype('int64').reshape(
                                              args.batch_size, 1))
                            eval_doc = to_variable(eval_np_doc)
                            eval_avg_cost, eval_prediction, eval_acc = model(
                                eval_doc, eval_label)
                            eval_np_mask = (eval_np_doc !=
                                            args.vocab_size).astype('int32')
                            eval_word_num = np.sum(eval_np_mask)
                            total_eval_cost.append(eval_avg_cost.numpy() *
                                                   eval_word_num)
                            total_eval_acc.append(eval_acc.numpy() *
                                                  eval_word_num)
                            total_eval_num_seqs.append(eval_word_num)

                            eval_steps += 1

                        time_end = time.time()
                        used_time = time_end - time_begin
                        print(
                            "Final validation result: step: %d, ave loss: %f, "
                            "ave acc: %f, speed: %f steps/s" %
                            (steps, np.sum(total_eval_cost) /
                             np.sum(total_eval_num_seqs),
                             np.sum(total_eval_acc) /
                             np.sum(total_eval_num_seqs),
                             eval_steps / used_time))
                        time_begin = time.time()
                        if args.ce:
                            print("kpis\ttrain_loss\t%0.3f" %
                                  (np.sum(total_eval_cost) /
                                   np.sum(total_eval_num_seqs)))
                            print("kpis\ttrain_acc\t%0.3f" %
                                  (np.sum(total_eval_acc) /
                                   np.sum(total_eval_num_seqs)))

                    if steps % args.save_steps == 0:
                        save_path = args.checkpoints + "/" + "save_dir_" + str(
                            steps)
                        print('save model to: ' + save_path)
                        fluid.dygraph.save_dygraph(model.state_dict(),
                                                   save_path)
                if enable_profile:
                    print('save profile result into /tmp/profile_file')
                    return
        if args.ce:
            card_num = get_cards()
            _acc = 0
            _time = 0
            try:
                _time = ce_time[-1]
                _acc = ce_infor[-1]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time))
            print("kpis\ttrain_acc_card%s\t%f" % (card_num, _acc))
Esempio n. 7
0
def train():
    # with fluid.dygraph.guard(place):
    with fluid.dygraph.guard():
        if args.ce:
            print("ce mode")
            seed = args.random_seed
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
        processor = reader.SentaProcessor(data_dir=args.data_dir,
                                          vocab_path=args.vocab_path,
                                          random_seed=args.random_seed)
        num_labels = len(processor.get_labels())

        if not args.ce:
            train_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='train',
                epoch=args.epoch,
                shuffle=True)

            eval_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='dev',
                epoch=args.epoch,
                shuffle=False)
        else:
            train_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='train',
                epoch=args.epoch,
                shuffle=False)

            eval_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='dev',
                epoch=args.epoch,
                shuffle=False)

        model = nets.CNN(args.vocab_size)

        # save initial param to files
        param_dict = {}
        for param_name in model.state_dict():

            param_dict[param_name] = model.state_dict()[param_name].numpy()
            if 'embedding' in param_name:
                state_dict = model.state_dict()
                param_dict[param_name][0] = 0
                state_dict[param_name] = paddle.to_tensor(
                    param_dict[param_name])
                model.set_dict(state_dict)
                # print(param_dict[param_name][0])
        np.savez('./paramters.npz', **param_dict)
        for parameters in model.named_parameters():
            print(parameters[0])
            if 'embedding' in parameters[0]:
                print(model.state_dict()[parameters[0]][0].shape)

        # sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr,parameter_list=model.parameters())
        sgd_optimizer = paddle.fluid.optimizer.SGD(
            learning_rate=args.lr, parameter_list=model.parameters())
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        gru_hidden_data = np.zeros((args.batch_size, 128), dtype='float64')
        ce_time, ce_infor = [], []
        reader_time = 0.0

        num_train_examples = processor.get_num_examples(phase="train")

        for eop in range(args.epoch):
            time_begin = time.time()
            for batch_id, data in enumerate(train_data_generator()):
                reader_begin = time.time()
                seq_len_arr = np.array([len(x[0]) for x in data],
                                       dtype="int64")
                steps += 1
                seq_len = paddle.to_tensor(seq_len_arr)
                doc = paddle.to_tensor(
                    np.array([
                        np.pad(x[0][0:args.padding_size],
                               (0, args.padding_size -
                                len(x[0][0:args.padding_size])),
                               'constant',
                               constant_values=0) for x in data
                    ]).astype('int64'))

                label = paddle.to_tensor(
                    np.array([x[1] for x in data
                              ]).astype('int64').reshape(args.batch_size, 1))

                reader_end = time.time()
                reader_time += (reader_end - reader_begin)
                model.train()

                avg_cost, prediction, acc = model(doc, seq_len,
                                                  args.padding_size, label)
                model.clear_gradients()
                avg_cost.backward()

                sgd_optimizer.minimize(avg_cost)

                # np_mask = (doc.numpy() != 0).astype('int32')
                # word_num = np.sum(np_mask)
                word_num = np.sum(seq_len_arr)

                total_cost.append(avg_cost.numpy() * word_num)
                total_acc.append(acc.numpy() * word_num)
                total_num_seqs.append(word_num)

                if steps % args.skip_steps == 0:
                    time_end = time.time()
                    used_time = time_end - time_begin

                    print(
                        "step: %d, ave loss: %f, "
                        "ave acc: %f, speed: %f steps/s, reader speed: %f steps/s"
                        % (steps, np.sum(total_cost) / np.sum(total_num_seqs),
                           np.sum(total_acc) / np.sum(total_num_seqs),
                           args.skip_steps / used_time,
                           args.skip_steps / reader_time))
                    reader_time = 0.0
                    ce_time.append(used_time)
                    ce_infor.append(np.sum(total_acc) / np.sum(total_num_seqs))
                    total_cost, total_acc, total_num_seqs = [], [], []
                    time_begin = time.time()

                    # if steps % args.validation_steps == 0:
                    #     total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], []
                    #     model.eval()
                    #     eval_steps = 0
                    #     gru_hidden_data = np.zeros((args.batch_size, 128), dtype='float64')
                    #     for eval_batch_id, eval_data in enumerate(
                    #             eval_data_generator()):
                    #         eval_seq_arr = np.array([len(x[0]) for x in data], dtype="int64")
                    #         eval_seq_len = to_variable(eval_seq_arr)
                    #         eval_np_doc = np.array([
                    #             np.pad(x[0][0:args.padding_size],
                    #                    (0, args.padding_size -
                    #                     len(x[0][0:args.padding_size])),
                    #                    'constant',
                    #                    constant_values=0) # args.vocab_size))
                    #             for x in eval_data
                    #         ]).astype('int64')# .reshape(-1)
                    #         eval_label = to_variable(
                    #             np.array([x[1] for x in eval_data]).astype(
                    #                 'int64').reshape(args.batch_size, 1))
                    #         eval_doc = to_variable(eval_np_doc)
                    #         eval_avg_cost, eval_prediction, eval_acc = model(
                    #             eval_doc, eval_seq_len, args.padding_size, eval_label)
                    #         eval_np_mask = (
                    #             eval_np_doc != 0).astype('int32')
                    #             # eval_np_doc != args.vocab_size).astype('int32')
                    #         # eval_word_num = np.sum(eval_np_mask)
                    #         eval_word_num = np.sum(eval_seq_arr)
                    #         total_eval_cost.append(eval_avg_cost.numpy() *
                    #                                eval_word_num)
                    #         total_eval_acc.append(eval_acc.numpy() *
                    #                               eval_word_num)
                    #         total_eval_num_seqs.append(eval_word_num)

                    #         eval_steps += 1

                    #     time_end = time.time()
                    #     used_time = time_end - time_begin
                    #     print(
                    #         "Final validation result: step: %d, ave loss: %f, "
                    #         "ave acc: %f, speed: %f steps/s" %
                    #         (steps, np.sum(total_eval_cost) /
                    #          np.sum(total_eval_num_seqs), np.sum(total_eval_acc)
                    #          / np.sum(total_eval_num_seqs),
                    #          eval_steps / used_time))
                    #     time_begin = time.time()
                    #     if args.ce:
                    #         print("kpis\ttrain_loss\t%0.3f" %
                    #               (np.sum(total_eval_cost) /
                    #                np.sum(total_eval_num_seqs)))
                    #         print("kpis\ttrain_acc\t%0.3f" %
                    #               (np.sum(total_eval_acc) /
                    #                np.sum(total_eval_num_seqs)))

                    # if steps % args.save_steps == 0:
                    #     save_path = args.checkpoints+"/"+"save_dir_" + str(steps)
                    #     print('save model to: ' + save_path)
                    #     fluid.dygraph.save_dygraph(model.state_dict(),
                    #                                save_path)
                    # fluid.dygraph.save_dygraph(model.state_dict(),
                    #  save_path)
        if args.ce:
            card_num = get_cards()
            _acc = 0
            _time = 0
            try:
                _time = ce_time[-1]
                _acc = ce_infor[-1]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time))
            print("kpis\ttrain_acc_card%s\t%f" % (card_num, _acc))