Beispiel #1
0
    def eval(data):
        # when eval the batch_size set to 1
        eval_data_iter = reader.get_data_iter(data, config.batch_size,
                                              config.num_steps)
        total_loss = 0.0
        iters = 0
        init_hidden, init_cell = generate_init_data()
        for batch_id, batch in enumerate(eval_data_iter):
            input_data_feed = prepare_input(
                batch, init_hidden, init_cell, epoch_id=0, with_lr=False)
            fetch_outs = exe.run(
                program=inference_program,
                feed=input_data_feed,
                fetch_list=[loss.name, last_hidden.name, last_cell.name],
                use_program_cache=False)

            cost_eval = np.array(fetch_outs[0])
            init_hidden = np.array(fetch_outs[1])
            init_cell = np.array(fetch_outs[2])

            total_loss += cost_eval
            iters += config.num_steps

        ppl = np.exp(total_loss / iters)
        return ppl
Beispiel #2
0
    def eval(data):
        # when eval the batch_size set to 1
        eval_data_iter = reader.get_data_iter(data, batch_size, num_steps)
        total_loss = 0.0
        iters = 0
        init_hidden = np.zeros((num_layers, batch_size, hidden_size),
                               dtype='float32')
        init_cell = np.zeros((num_layers, batch_size, hidden_size),
                             dtype='float32')
        for batch_id, batch in enumerate(eval_data_iter):
            input_data_feed = prepare_input(batch,
                                            init_hidden,
                                            init_cell,
                                            epoch_id,
                                            with_lr=False)
            fetch_outs = exe.run(
                inference_program,
                feed=input_data_feed,
                fetch_list=[loss.name, last_hidden.name, last_cell.name],
                use_program_cache=True)

            cost_train = np.array(fetch_outs[0])
            init_hidden = np.array(fetch_outs[1])
            init_cell = np.array(fetch_outs[2])

            total_loss += cost_train
            iters += num_steps

        ppl = np.exp(total_loss / iters)
        return ppl
Beispiel #3
0
        def eval(model, data):
            print("begion to eval")
            total_loss = 0.0
            iters = 0.0
            init_hidden_data = np.zeros((num_layers, batch_size, hidden_size),
                                        dtype='float32')
            init_cell_data = np.zeros((num_layers, batch_size, hidden_size),
                                      dtype='float32')

            model.eval()
            train_data_iter = reader.get_data_iter(data, batch_size, num_steps)
            for batch_id, batch in enumerate(train_data_iter):
                x_data, y_data = batch
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, 1))
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)

                out_loss = dy_loss.numpy()

                init_hidden_data = last_hidden.numpy()
                init_cell_data = last_cell.numpy()

                total_loss += out_loss
                iters += num_steps

            print("eval finished")
            ppl = np.exp(total_loss / iters)
            print("ppl ", batch_id, ppl[0])
            if args.ce:
                print("kpis\ttest_ppl\t%0.3f" % ppl[0])
Beispiel #4
0
        def eval(model, data):
            print("begin to eval")
            total_loss = 0.0
            iters = 0.0
            init_hidden_data = np.zeros((num_layers, batch_size, hidden_size),
                                        dtype='float32')
            init_cell_data = np.zeros((num_layers, batch_size, hidden_size),
                                      dtype='float32')

            model.eval()
            train_data_iter = reader_decorator(
                reader.get_data_iter(data, batch_size, num_steps))

            eval_data_loader = fluid.io.DataLoader.from_generator(capacity=200)
            eval_data_loader.set_batch_generator(train_data_iter, places=place)

            for batch_id, batch in enumerate(eval_data_loader):
                x, y = batch
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)

                out_loss = dy_loss.numpy()

                init_hidden_data = last_hidden.numpy()
                init_cell_data = last_cell.numpy()

                total_loss += out_loss
                iters += num_steps

            print("eval finished")
            ppl = np.exp(total_loss / iters)
            print("ppl ", batch_id, ppl[0])
Beispiel #5
0
 def data_gen():
     data_iter_size = config.batch_size
     train_batches = reader.get_data_iter(train_data, data_iter_size,
                                          config.num_steps)
     for batch in train_batches:
         x, y = batch
         x = x.reshape((-1, config.num_steps, 1))
         y = y.reshape((-1, 1))
         yield x, y
Beispiel #6
0
 def data_gen():
     data_iter_size = batch_size // device_count
     train_batches = reader.get_data_iter(train_data,
                                          data_iter_size, num_steps)
     for batch in train_batches:
         x, y = batch
         x = x.reshape((-1, num_steps, 1))
         y = y.reshape((-1, 1))
         yield x, y
Beispiel #7
0
def fetch_loss_grad(x):
    data_iter = reader.get_data_iter(x, 1)
    for batch_id, batch in enumerate(data_iter):
        input_data_feed = prepare_input(batch)

        result = exe.run(test_program,
                         fetch_list=fetch_list,
                         feed=input_data_feed)

        loss = result[0]
        grad = result[1]

        return loss, grad
Beispiel #8
0
        def eval(data, epoch_id=0):
            model.eval()
            eval_data_iter = reader.get_data_iter(data, batch_size, mode='eval')
            total_loss = 0.0
            word_count = 0.0
            for batch_id, batch in enumerate(eval_data_iter):
                input_data_feed, word_num = prepare_input(
                    batch, epoch_id)
                loss = model(input_data_feed)

                total_loss += loss * batch_size
                word_count += word_num
            ppl = np.exp(total_loss.numpy() / word_count)
            model.train()
            return ppl
def train():
    startup_program = fluid.default_startup_program()
    main_program = fluid.default_main_program()

    raw_data = reader.raw_data('fra.txt', num_samples=num_samples)
    train_data = raw_data[0]
    data_vars = raw_data[1]

    model = BaseModel(hidden_size=latent_dim,
                      src_vocab_size=data_vars['num_encoder_tokens'],
                      tar_vocab_size=data_vars['num_decoder_tokens'],
                      batch_size=batch_size,
                      batch_first=True)

    loss = model.build_graph()

    optimizer = fluid.optimizer.Adam(learning_rate=0.001)
    optimizer.minimize(loss)

    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(framework.default_startup_program())

    ce_ppl = []
    for epoch_id in range(num_epochs):
        print("epoch ", epoch_id)

        train_data_iter = reader.get_data_iter(train_data, batch_size)

        total_loss = 0
        word_count = 0.0
        for batch_id, batch in enumerate(train_data_iter):

            input_data_feed, word_num = prepare_input(batch, epoch_id=epoch_id)
            fetch_outs = exe.run(feed=input_data_feed,
                                 fetch_list=[loss.name],
                                 use_program_cache=True)

            cost_train = np.array(fetch_outs[0])

            total_loss += cost_train * batch_size
            word_count += word_num

            if batch_id > 0 and batch_id % batch_size == 0:
                print("  ppl", batch_id, np.exp(total_loss / word_count))
                ce_ppl.append(np.exp(total_loss / word_count))
                total_loss = 0.0
                word_count = 0.0
Beispiel #10
0
    def train_an_epoch(epoch_id, batch_times):
        # get train epoch size
        log_interval = get_log_interval(len(train_data))
        train_data_iter = reader.get_data_iter(
            train_data, config.batch_size * device_count, config.num_steps)

        total_loss = 0
        iters = 0

        init_hidden, init_cell = generate_init_data()
        for batch_id, batch in enumerate(train_data_iter):
            input_data_feed = prepare_input(batch,
                                            init_hidden=init_hidden,
                                            init_cell=init_cell,
                                            epoch_id=epoch_id,
                                            with_lr=True,
                                            device_count=device_count)
            batch_start_time = time.time()
            fetch_outs = exe.run(train_program,
                                 feed=input_data_feed,
                                 fetch_list=[
                                     loss.name, "learning_rate",
                                     last_hidden.name, last_cell.name
                                 ],
                                 use_program_cache=True)
            batch_time = time.time() - batch_start_time
            batch_times.append(batch_time)

            cost_train = np.array(fetch_outs[0])
            lr = np.array(fetch_outs[1])
            init_hidden = np.array(fetch_outs[2])
            init_cell = np.array(fetch_outs[3])
            total_loss += cost_train
            iters += config.num_steps
            if batch_id > 0 and batch_id % log_interval == 0:
                ppl = np.exp(total_loss / iters)
                print(
                    "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f, lr: %.5f"
                    % (epoch_id, batch_id, batch_time, ppl[0], lr[0]))

            # profiler tools for benchmark
            if args.profile and batch_id == log_interval:
                profiler.reset_profiler()
            elif args.profile and batch_id == (log_interval + 5):
                break
        ppl = np.exp(total_loss / iters)
        return ppl
Beispiel #11
0
    def train_an_epoch(epoch_id, batch_times):
        # get train epoch size
        log_interval = get_log_interval(len(train_data), batch_size)
        train_data_iter = reader.get_data_iter(train_data, batch_size,
                                               num_steps)

        total_loss = 0
        iters = 0
        for batch_id, batch in enumerate(train_data_iter):
            if batch_id == 0:
                init_hidden, init_cell = get_init_data()
            else:
                init_hidden = None
                init_cell = None
            input_data_feed = prepare_input(batch,
                                            init_hidden=init_hidden,
                                            init_cell=init_cell,
                                            epoch_id=epoch_id,
                                            device_count=device_count)

            batch_start_time = time.time()
            fetch_outs = exe.run(train_program,
                                 feed=input_data_feed,
                                 fetch_list=[loss.name, "learning_rate"],
                                 use_program_cache=True)
            batch_time = time.time() - batch_start_time
            batch_times.append(batch_time)

            cost_train = np.array(fetch_outs[0])
            lr = np.array(fetch_outs[1])

            total_loss += cost_train
            iters += num_steps
            if batch_id > 0 and batch_id % log_interval == 0:
                ppl = np.exp(total_loss / iters)
                print(
                    "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f, lr: %.5f"
                    % (epoch_id, batch_id, batch_time, ppl[0], lr[0]))

            if args.profile:
                if batch_id == 1:
                    profiler.reset_profiler()
                elif batch_id >= 11:
                    break

        ppl = np.exp(total_loss / iters)
        return ppl
Beispiel #12
0
    def eval(sess, data):
        if args.inference_only:
            sess.run(init)

        batch_times = []
        start_time = time.time()

        eval_loss = 0.0
        eval_iters = 0
        eval_data_iter = reader.get_data_iter(data, batch_size, num_steps)

        init_h = np.zeros((num_layers, batch_size, hidden_size),
                          dtype='float32')
        init_c = np.zeros((num_layers, batch_size, hidden_size),
                          dtype='float32')
        for batch in eval_data_iter:
            x, y = batch
            feed_dict = {}
            feed_dict[feeding_list[0]] = x
            feed_dict[feeding_list[1]] = y
            feed_dict[feeding_list[2]] = init_h
            feed_dict[feeding_list[3]] = init_c

            batch_start_time = time.time()
            output = sess.run([cost, final_h, final_c], feed_dict)
            batch_times.append(time.time() - batch_start_time)

            train_cost = output[0]
            init_h = output[1]
            init_c = output[2]

            eval_loss += train_cost
            eval_iters += num_steps

        ppl = np.exp(eval_loss / eval_iters)

        eval_time_total = time.time() - start_time
        eval_time_run = np.sum(batch_times)

        if args.inference_only:
            print(
                "Eval batch_size: %d; Time (total): %.5f s; Time (only run): %.5f s; ppl: %.5f"
                % (batch_size, eval_time_total, eval_time_run, ppl))

        return ppl, eval_time_total
Beispiel #13
0
    def eval(model, data):
        print("begion to eval")
        total_loss = 0.0
        iters = 0.0
        init_hidden_data = np.zeros((num_layers, batch_size, hidden_size),
                                    dtype='float32')

        model.eval()
        train_data_iter = reader.get_data_iter(data, batch_size, num_steps)
        init_hidden = paddle.to_tensor(data=init_hidden_data,
                                       dtype=None,
                                       place=None,
                                       stop_gradient=True)
        accum_num_recall = 0.0
        for batch_id, batch in enumerate(train_data_iter):
            x_data, y_data = batch
            x_data = x_data.reshape((-1, num_steps, 1))
            y_data = y_data.reshape((-1, num_steps, 1))
            x = paddle.to_tensor(data=x_data,
                                 dtype=None,
                                 place=None,
                                 stop_gradient=True)
            y = paddle.to_tensor(data=y_data,
                                 dtype=None,
                                 place=None,
                                 stop_gradient=True)
            dy_loss, last_hidden, acc = ptb_model(x, y, init_hidden)

            out_loss = dy_loss.numpy()
            acc_ = acc.numpy()[0]
            accum_num_recall += acc_
            if batch_id % 1 == 0:
                print("batch_id:%d  recall@20:%.4f" %
                      (batch_id, accum_num_recall / (batch_id + 1)))

            init_hidden = last_hidden

            total_loss += out_loss
            iters += num_steps

        print("eval finished")
        ppl = np.exp(total_loss / iters)
        print("recall@20 ", accum_num_recall / (batch_id + 1))
        if args.ce:
            print("kpis\ttest_ppl\t%0.3f" % ppl[0])
Beispiel #14
0
            def data_gen():
                data_iter_size = config.batch_size // device_count
                train_batches = reader.get_data_iter(train_data, data_iter_size,
                                                     config.num_steps)
                for batch in train_batches:
                    x, y = batch
                    x = x.reshape((-1, config.num_steps, 1))
                    y = y.reshape((-1, 1))
                    if args.rnn_model == "lod":
                        x = to_lodtensor(x.reshape((-1, 1)), place, [
                            range(0, (data_iter_size + 1) * config.num_steps,
                                  config.num_steps)
                        ])
                        y = to_lodtensor(y.reshape((-1, 1)), place, [
                            range(0, (data_iter_size + 1) * config.num_steps,
                                  config.num_steps)
                        ])

                    yield x, y
Beispiel #15
0
    def eval(data, epoch_id=0):
        eval_data_iter = reader.get_data_iter(data, batch_size, mode='eval')
        total_loss = 0.0
        word_count = 0.0
        for batch_id, batch in enumerate(eval_data_iter):
            input_data_feed, word_num = prepare_input(batch,
                                                      epoch_id,
                                                      with_lr=False)
            fetch_outs = exe.run(inference_program,
                                 feed=input_data_feed,
                                 fetch_list=[loss.name],
                                 use_program_cache=False)

            cost_train = np.array(fetch_outs[0])

            total_loss += cost_train * batch_size
            word_count += word_num

        ppl = np.exp(total_loss / word_count)

        return ppl
Beispiel #16
0
    def eval(data):
        eval_data_iter = reader.get_data_iter(data, batch_size, mode='eval')
        total_loss = 0.0
        word_count = 0.0
        batch_count = 0.0
        for batch_id, batch in enumerate(eval_data_iter):
            input_data_feed, src_word_num, dec_word_sum = prepare_input(batch)
            fetch_outs = exe.run(inference_program,
                                 feed=input_data_feed,
                                 fetch_list=[loss.name],
                                 use_program_cache=False)

            cost_train = np.array(fetch_outs[0])

            total_loss += cost_train * batch_size
            word_count += dec_word_sum
            batch_count += batch_size

        nll = total_loss / batch_count
        ppl = np.exp(total_loss / word_count)

        return nll, ppl
Beispiel #17
0
    def train():
        ce_time = []
        ce_ppl = []
        max_epoch = args.max_epoch
        kl_w = args.kl_start
        lr_w = args.learning_rate
        best_valid_nll = 1e100  # +inf
        best_epoch_id = -1
        decay_cnt = 0
        max_decay = args.max_decay
        decay_factor = 0.5
        decay_ts = 2
        steps_not_improved = 0
        for epoch_id in range(max_epoch):
            start_time = time.time()
            if args.enable_ce:
                train_data_iter = reader.get_data_iter(train_data,
                                                       batch_size,
                                                       args.sort_cache,
                                                       args.cache_num,
                                                       enable_ce=True)
            else:
                train_data_iter = reader.get_data_iter(train_data, batch_size,
                                                       args.sort_cache,
                                                       args.cache_num)

            total_loss = 0
            total_rec_loss = 0
            total_kl_loss = 0
            word_count = 0.0
            batch_count = 0.0
            batch_times = []
            for batch_id, batch in enumerate(train_data_iter):
                batch_start_time = time.time()
                kl_w = min(1.0, kl_w + anneal_r)
                kl_weight = kl_w
                input_data_feed, src_word_num, dec_word_sum = prepare_input(
                    batch, kl_weight, lr_w)
                fetch_outs = exe.run(
                    program=train_program,
                    feed=input_data_feed,
                    fetch_list=[loss.name, kl_loss.name, rec_loss.name],
                    use_program_cache=False)

                cost_train = np.array(fetch_outs[0])
                kl_cost_train = np.array(fetch_outs[1])
                rec_cost_train = np.array(fetch_outs[2])

                total_loss += cost_train * batch_size
                total_rec_loss += rec_cost_train * batch_size
                total_kl_loss += kl_cost_train * batch_size
                word_count += dec_word_sum
                batch_count += batch_size
                batch_end_time = time.time()
                batch_time = batch_end_time - batch_start_time
                batch_times.append(batch_time)

                if batch_id > 0 and batch_id % 200 == 0:
                    print("-- Epoch:[%d]; Batch:[%d]; Time: %.4f s; "
                          "kl_weight: %.4f; kl_loss: %.4f; rec_loss: %.4f; "
                          "nll: %.4f; ppl: %.4f" %
                          (epoch_id, batch_id, batch_time, kl_w,
                           total_kl_loss / batch_count, total_rec_loss /
                           batch_count, total_loss / batch_count,
                           np.exp(total_loss / word_count)))
                    ce_ppl.append(np.exp(total_loss / word_count))

            end_time = time.time()
            epoch_time = end_time - start_time
            ce_time.append(epoch_time)
            print(
                "\nTrain epoch:[%d]; Epoch Time: %.4f; avg_time: %.4f s/step\n"
                % (epoch_id, epoch_time, sum(batch_times) / len(batch_times)))

            val_nll, val_ppl = eval(valid_data)
            print("dev ppl", val_ppl)
            test_nll, test_ppl = eval(test_data)
            print("test ppl", test_ppl)

            if val_nll < best_valid_nll:
                best_valid_nll = val_nll
                steps_not_improved = 0
                best_nll = test_nll
                best_ppl = test_ppl
                best_epoch_id = epoch_id
                save_path = os.path.join(args.model_path,
                                         "epoch_" + str(best_epoch_id),
                                         "checkpoint")
                print("save model {}".format(save_path))
                fluid.save(main_program, save_path)
            else:
                steps_not_improved += 1
                if steps_not_improved == decay_ts:
                    old_lr = lr_w
                    lr_w *= decay_factor
                    steps_not_improved = 0
                    new_lr = lr_w

                    print('-----\nchange lr, old lr: %f, new lr: %f\n-----' %
                          (old_lr, new_lr))

                    dir_name = args.model_path + "/epoch_" + str(best_epoch_id)
                    fluid.load(main_program, dir_name, exe)

                    decay_cnt += 1
                    if decay_cnt == max_decay:
                        break

        print('\nbest testing nll: %.4f, best testing ppl %.4f\n' %
              (best_nll, best_ppl))

        if args.enable_ce:
            card_num = get_cards()
            _ppl = 0
            _time = 0
            try:
                _time = ce_time[-1]
                _ppl = ce_ppl[-1]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time))
            print("kpis\ttrain_ppl_card%s\t%f" % (card_num, _ppl))
Beispiel #18
0
def train_ptb_lm():

    args = parse_args()
    model_type = args.model_type

    vocab_size = 10000
    if model_type == "test":
        num_layers = 1
        batch_size = 2
        hidden_size = 10
        num_steps = 3
        init_scale = 0.1
        max_grad_norm = 5.0
        epoch_start_decay = 1
        max_epoch = 1
        dropout = 0.0
        lr_decay = 0.5
        base_learning_rate = 1.0
    elif model_type == "small":
        num_layers = 2
        batch_size = 20
        hidden_size = 200
        num_steps = 20
        init_scale = 0.1
        max_grad_norm = 5.0
        epoch_start_decay = 4
        max_epoch = 13
        dropout = 0.0
        lr_decay = 0.5
        base_learning_rate = 1.0
    elif model_type == "medium":
        num_layers = 2
        batch_size = 20
        hidden_size = 650
        num_steps = 35
        init_scale = 0.05
        max_grad_norm = 5.0
        epoch_start_decay = 6
        max_epoch = 39
        dropout = 0.5
        lr_decay = 0.8
        base_learning_rate = 1.0
    elif model_type == "large":
        num_layers = 2
        batch_size = 20
        hidden_size = 1500
        num_steps = 35
        init_scale = 0.04
        max_grad_norm = 10.0
        epoch_start_decay = 14
        max_epoch = 55
        dropout = 0.65
        lr_decay = 1.0 / 1.15
        base_learning_rate = 1.0
    else:
        print("model type not support")
        return

    with fluid.dygraph.guard(core.CUDAPlace(0)):
        if args.ce:
            print("ce mode")
            seed = 33
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            max_epoch = 1
        ptb_model = PtbModel("ptb_model",
                             hidden_size=hidden_size,
                             vocab_size=vocab_size,
                             num_layers=num_layers,
                             num_steps=num_steps,
                             init_scale=init_scale,
                             dropout=dropout)

        dy_param_updated = dict()
        dy_param_init = dict()
        dy_loss = None
        last_hidden = None
        last_cell = None

        data_path = args.data_path
        print("begin to load data")
        ptb_data = reader.get_ptb_data(data_path)
        print("finished load data")
        train_data, valid_data, test_data = ptb_data

        batch_len = len(train_data) // batch_size
        total_batch_size = (batch_len - 1) // num_steps
        log_interval = total_batch_size // 20

        bd = []
        lr_arr = [1.0]
        for i in range(1, max_epoch):
            bd.append(total_batch_size * i)
            new_lr = base_learning_rate * (lr_decay**max(
                i + 1 - epoch_start_decay, 0.0))
            lr_arr.append(new_lr)

        sgd = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
            boundaries=bd, values=lr_arr))

        def eval(model, data):
            print("begion to eval")
            total_loss = 0.0
            iters = 0.0
            init_hidden_data = np.zeros((num_layers, batch_size, hidden_size),
                                        dtype='float32')
            init_cell_data = np.zeros((num_layers, batch_size, hidden_size),
                                      dtype='float32')

            model.eval()
            train_data_iter = reader.get_data_iter(data, batch_size, num_steps)
            for batch_id, batch in enumerate(train_data_iter):
                x_data, y_data = batch
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, 1))
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)

                out_loss = dy_loss.numpy()

                init_hidden_data = last_hidden.numpy()
                init_cell_data = last_cell.numpy()

                total_loss += out_loss
                iters += num_steps

            print("eval finished")
            ppl = np.exp(total_loss / iters)
            print("ppl ", batch_id, ppl[0])
            if args.ce:
                print("kpis\ttest_ppl\t%0.3f" % ppl[0])

        grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(max_grad_norm)
        for epoch_id in range(max_epoch):
            ptb_model.train()
            total_loss = 0.0
            iters = 0.0
            init_hidden_data = np.zeros((num_layers, batch_size, hidden_size),
                                        dtype='float32')
            init_cell_data = np.zeros((num_layers, batch_size, hidden_size),
                                      dtype='float32')

            train_data_iter = reader.get_data_iter(train_data, batch_size,
                                                   num_steps)

            start_time = time.time()
            for batch_id, batch in enumerate(train_data_iter):
                x_data, y_data = batch
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, 1))
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(
                    x, y, init_hidden, init_cell)

                out_loss = dy_loss.numpy()

                init_hidden_data = last_hidden.numpy()
                init_cell_data = last_cell.numpy()
                dy_loss.backward()
                sgd.minimize(dy_loss, grad_clip=grad_clip)

                ptb_model.clear_gradients()
                total_loss += out_loss
                iters += num_steps

                if batch_id > 0 and batch_id % log_interval == 0:
                    ppl = np.exp(total_loss / iters)
                    print(epoch_id, "ppl ", batch_id, ppl[0],
                          sgd._global_learning_rate().numpy())

            print("one ecpoh finished", epoch_id)
            print("time cost ", time.time() - start_time)
            ppl = np.exp(total_loss / iters)
            print("ppl ", epoch_id, ppl[0])
            if args.ce:
                print("kpis\ttrain_ppl\t%0.3f" % ppl[0])

        eval(ptb_model, test_data)
Beispiel #19
0
def train_ptb_lm():
    args = parse_args()

    # check if set use_gpu=True in paddlepaddle cpu version
    model_check.check_cuda(args.use_gpu)
    # check if paddlepaddle version is satisfied
    model_check.check_version()

    model_type = args.model_type

    vocab_size = 37484
    if model_type == "test":
        num_layers = 1
        batch_size = 2
        hidden_size = 10
        num_steps = 4
        init_scale = 0.1
        max_grad_norm = 5.0
        epoch_start_decay = 1
        max_epoch = 1
        dropout = 0.0
        lr_decay = 0.5
        base_learning_rate = 1.0
    elif model_type == "small":
        num_layers = 2
        batch_size = 20
        hidden_size = 200
        num_steps = 20
        init_scale = 0.1
        max_grad_norm = 5.0
        epoch_start_decay = 4
        max_epoch = 2
        dropout = 0.0
        lr_decay = 0.5
        base_learning_rate = 1.0
    elif model_type == "gru4rec":
        num_layers = 1
        batch_size = 500
        hidden_size = 100
        num_steps = 10
        init_scale = 0.1
        max_grad_norm = 5.0
        epoch_start_decay = 10
        max_epoch = 5
        dropout = 0.0
        lr_decay = 0.5
        base_learning_rate = 0.05
    elif model_type == "medium":
        num_layers = 2
        batch_size = 20
        hidden_size = 650
        num_steps = 35
        init_scale = 0.05
        max_grad_norm = 5.0
        epoch_start_decay = 6
        max_epoch = 39
        dropout = 0.5
        lr_decay = 0.8
        base_learning_rate = 1.0
    elif model_type == "large":
        num_layers = 2
        batch_size = 20
        hidden_size = 1500
        num_steps = 35
        init_scale = 0.04
        max_grad_norm = 10.0
        epoch_start_decay = 14
        max_epoch = 55
        dropout = 0.65
        lr_decay = 1.0 / 1.15
        base_learning_rate = 1.0
    else:
        print("model type not support")
        return

    with fluid.dygraph.guard(core.CUDAPlace(0)):
        if args.ce:
            print("ce mode")
            seed = 33
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            max_epoch = 1
        ptb_model = PtbModel("ptb_model",
                             hidden_size=hidden_size,
                             vocab_size=vocab_size,
                             num_layers=num_layers,
                             num_steps=num_steps,
                             init_scale=init_scale,
                             dropout=dropout)

        if args.init_from_pretrain_model:
            if not os.path.exists(args.init_from_pretrain_model + '.pdparams'):
                print(args.init_from_pretrain_model)
                raise Warning("The pretrained params do not exist.")
                return
            fluid.load_dygraph(args.init_from_pretrain_model)
            print("finish initing model from pretrained params from %s" %
                  (args.init_from_pretrain_model))

        dy_param_updated = dict()
        dy_param_init = dict()
        dy_loss = None
        last_hidden = None

        data_path = args.data_path
        print("begin to load data")
        ptb_data = reader.get_ptb_data(data_path)
        print("finished load data")
        train_data, valid_data, test_data = ptb_data

        batch_len = len(train_data) // batch_size
        total_batch_size = (batch_len - 1) // num_steps
        print("total_batch_size:", total_batch_size)
        log_interval = total_batch_size // 20

        bd = []
        lr_arr = [base_learning_rate]
        for i in range(1, max_epoch):
            bd.append(total_batch_size * i)
            new_lr = base_learning_rate * (lr_decay**max(
                i + 1 - epoch_start_decay, 0.0))
            lr_arr.append(new_lr)

        sgd = AdagradOptimizer(parameter_list=ptb_model.parameters(),
                               learning_rate=fluid.layers.piecewise_decay(
                                   boundaries=bd, values=lr_arr))

        print("parameters:--------------------------------")
        for para in ptb_model.parameters():
            print(para.name)
        print("parameters:--------------------------------")

        def eval(model, data):
            print("begion to eval")
            total_loss = 0.0
            iters = 0.0
            init_hidden_data = np.zeros((num_layers, batch_size, hidden_size),
                                        dtype='float32')

            model.eval()
            train_data_iter = reader.get_data_iter(data, batch_size, num_steps)
            init_hidden = to_variable(init_hidden_data)
            accum_num_recall = 0.0
            for batch_id, batch in enumerate(train_data_iter):
                x_data, y_data = batch
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, num_steps, 1))
                x = to_variable(x_data)
                y = to_variable(y_data)
                dy_loss, last_hidden, acc = ptb_model(x, y, init_hidden)

                out_loss = dy_loss.numpy()
                acc_ = acc.numpy()[0]
                accum_num_recall += acc_
                if batch_id % 1 == 0:
                    print("batch_id:%d  recall@20:%.4f" %
                          (batch_id, accum_num_recall / (batch_id + 1)))

                init_hidden = last_hidden

                total_loss += out_loss
                iters += num_steps

            print("eval finished")
            ppl = np.exp(total_loss / iters)
            print("recall@20 ", accum_num_recall / (batch_id + 1))
            if args.ce:
                print("kpis\ttest_ppl\t%0.3f" % ppl[0])

        grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(max_grad_norm)
        for epoch_id in range(max_epoch):
            ptb_model.train()
            total_loss = 0.0
            iters = 0.0
            init_hidden_data = np.zeros((num_layers, batch_size, hidden_size),
                                        dtype='float32')

            train_data_iter = reader.get_data_iter(train_data, batch_size,
                                                   num_steps)
            init_hidden = to_variable(init_hidden_data)

            start_time = time.time()
            for batch_id, batch in enumerate(train_data_iter):
                x_data, y_data = batch
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, num_steps, 1))
                x = to_variable(x_data)
                y = to_variable(y_data)
                dy_loss, last_hidden, acc = ptb_model(x, y, init_hidden)

                out_loss = dy_loss.numpy()
                acc_ = acc.numpy()[0]

                init_hidden = last_hidden
                dy_loss.backward()
                sgd.minimize(dy_loss, grad_clip=grad_clip)
                ptb_model.clear_gradients()
                total_loss += out_loss
                iters += num_steps

                if batch_id > 0 and batch_id % 100 == 1:
                    ppl = np.exp(total_loss / iters)
                    print(
                        "-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, acc: %.5f, lr: %.5f"
                        % (epoch_id, batch_id, ppl[0], acc_,
                           sgd._global_learning_rate().numpy()))

            print("one ecpoh finished", epoch_id)
            print("time cost ", time.time() - start_time)
            ppl = np.exp(total_loss / iters)
            print("-- Epoch:[%d]; ppl: %.5f" % (epoch_id, ppl[0]))
            if args.ce:
                print("kpis\ttrain_ppl\t%0.3f" % ppl[0])
            save_model_dir = os.path.join(args.save_model_dir, str(epoch_id),
                                          'params')
            fluid.save_dygraph(ptb_model.state_dict(), save_model_dir)
            print("Saved model to: %s.\n" % save_model_dir)
            eval(ptb_model, test_data)
Beispiel #20
0
    def train(sess):
        sess.run(init)

        if args.profile:
            profiler_step = 0
            profiler = model_analyzer.Profiler(graph=sess.graph)
            run_options = tf.RunOptions(trace_level = tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
        
        total_time = 0.0
        epoch_times = []
        
        for epoch_id in xrange(max_epoch):
            batch_times = []
            epoch_start_time = time.time()
            train_data_iter = reader.get_data_iter( train_data, batch_size, num_steps)

            # assign lr, update the learning rate
            new_lr_1 = base_learning_rate * ( lr_decay ** max(epoch_id + 1 - epoch_start_decay, 0.0) )
            sess.run( lr_update, {new_lr: new_lr_1})
        
            total_loss = 0.0
            iters = 0
            batch_len = len(train_data) // batch_size
            epoch_size = ( batch_len - 1 ) // num_steps

            if args.profile:
                log_fre = 1
            else:
                log_fre = epoch_size // 10
        
            init_h = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32')
            init_c = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32')
        
            count = 0.0
            for batch_id, batch in enumerate(train_data_iter):
                x,y = batch
                feed_dict = {}
                feed_dict[feeding_list[0]] = x
                feed_dict[feeding_list[1]] = y
                feed_dict[feeding_list[2]] = init_h
                feed_dict[feeding_list[3]] = init_c
        
                batch_start_time = time.time()
                if args.profile:
                    output = sess.run([cost, final_h, final_c, train_op], feed_dict, options=run_options, run_metadata=run_metadata)
                    profiler.add_step(step=profiler_step, run_meta=run_metadata)
                    profiler_step = profiler_step + 1
                    if batch_id >= 10:
                        break
                else:
                    output = sess.run([cost, final_h, final_c, train_op], feed_dict)
                batch_time = time.time() - batch_start_time
                batch_times.append(batch_time)
        
                train_cost = output[0]
                init_h = output[1]
                init_c = output[2]
        
                total_loss += train_cost
                iters += num_steps
                count = count + 1
                if batch_id > 0 and  batch_id % log_fre == 0:
                    ppl = np.exp( total_loss / iters )
                    print("-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f, lr: %.5f" % (epoch_id, batch_id, batch_time, ppl, new_lr_1))
        
            ppl = np.exp(total_loss / iters)
            epoch_time = time.time() - epoch_start_time
            epoch_times.append(epoch_time)
            total_time += epoch_time
        
            print("\nTrain epoch:[%d]; epoch Time: %.5f s; ppl: %.5f; avg_time: %.5f steps/s\n"
                  % (epoch_id, epoch_time, ppl, (batch_id + 1) / sum(batch_times)))

            valid_ppl, _ = eval(sess, valid_data)
            print("Valid ppl: %.5f" % valid_ppl)
    
        test_ppl, test_time = eval(sess, test_data)
        print("Test Time (total): %.5f, ppl: %.5f" % (test_time, test_ppl))
              
        if args.profile:
            profile_op_opt_builder = option_builder.ProfileOptionBuilder()
            profile_op_opt_builder.select(['micros','occurrence'])
            profile_op_opt_builder.order_by('micros')
            profile_op_opt_builder.with_max_depth(50)
            profiler.profile_operations(profile_op_opt_builder.build())
Beispiel #21
0
def main():
    args = parse_args()
    print(args)
    num_layers = args.num_layers
    src_vocab_size = args.src_vocab_size
    tar_vocab_size = args.tar_vocab_size
    batch_size = args.batch_size
    dropout = args.dropout
    init_scale = args.init_scale
    max_grad_norm = args.max_grad_norm
    hidden_size = args.hidden_size

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        #args.enable_ce = True
        if args.enable_ce:
            fluid.default_startup_program().random_seed = 102
            fluid.default_main_program().random_seed = 102
            np.random.seed(102)
            random.seed(102)

        # Training process

        if args.attention:
            model = AttentionModel(hidden_size,
                                   src_vocab_size,
                                   tar_vocab_size,
                                   batch_size,
                                   num_layers=num_layers,
                                   init_scale=init_scale,
                                   dropout=dropout)
        else:
            model = BaseModel(hidden_size,
                              src_vocab_size,
                              tar_vocab_size,
                              batch_size,
                              num_layers=num_layers,
                              init_scale=init_scale,
                              dropout=dropout)
        gloabl_norm_clip = GradientClipByGlobalNorm(max_grad_norm)
        lr = args.learning_rate
        opt_type = args.optimizer
        if opt_type == "sgd":
            optimizer = fluid.optimizer.SGD(lr,
                                            parameter_list=model.parameters(),
                                            grad_clip=gloabl_norm_clip)
        elif opt_type == "adam":
            optimizer = fluid.optimizer.Adam(lr,
                                             parameter_list=model.parameters(),
                                             grad_clip=gloabl_norm_clip)
        else:
            print("only support [sgd|adam]")
            raise Exception("opt type not support")

        train_data_prefix = args.train_data_prefix
        eval_data_prefix = args.eval_data_prefix
        test_data_prefix = args.test_data_prefix
        vocab_prefix = args.vocab_prefix
        src_lang = args.src_lang
        tar_lang = args.tar_lang
        print("begin to load data")
        raw_data = reader.raw_data(src_lang, tar_lang, vocab_prefix,
                                   train_data_prefix, eval_data_prefix,
                                   test_data_prefix, args.max_len)
        print("finished load data")
        train_data, valid_data, test_data, _ = raw_data

        def prepare_input(batch, epoch_id=0):
            src_ids, src_mask, tar_ids, tar_mask = batch
            res = {}
            src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1]))
            in_tar = tar_ids[:, :-1]
            label_tar = tar_ids[:, 1:]

            in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1]))
            label_tar = label_tar.reshape(
                (label_tar.shape[0], label_tar.shape[1], 1))
            inputs = [src_ids, in_tar, label_tar, src_mask, tar_mask]
            return inputs, np.sum(tar_mask)

        # get train epoch size
        def eval(data, epoch_id=0):
            model.eval()
            eval_data_iter = reader.get_data_iter(data,
                                                  batch_size,
                                                  mode='eval')
            total_loss = 0.0
            word_count = 0.0
            for batch_id, batch in enumerate(eval_data_iter):
                input_data_feed, word_num = prepare_input(batch, epoch_id)
                loss = model(input_data_feed)

                total_loss += loss * batch_size
                word_count += word_num
            ppl = np.exp(total_loss.numpy() / word_count)
            model.train()
            return ppl

        ce_time = []
        ce_ppl = []
        max_epoch = args.max_epoch
        for epoch_id in range(max_epoch):
            epoch_start = time.time()

            model.train()
            if args.enable_ce:
                train_data_iter = reader.get_data_iter(train_data,
                                                       batch_size,
                                                       enable_ce=True)
            else:
                train_data_iter = reader.get_data_iter(train_data, batch_size)

            total_loss = 0
            word_count = 0.0
            batch_times = []
            total_reader_cost = 0.0
            interval_time_start = time.time()

            batch_start = time.time()
            for batch_id, batch in enumerate(train_data_iter):
                batch_reader_end = time.time()
                total_reader_cost += batch_reader_end - batch_start

                input_data_feed, word_num = prepare_input(batch,
                                                          epoch_id=epoch_id)
                word_count += word_num
                loss = model(input_data_feed)
                loss.backward()
                optimizer.minimize(loss)
                model.clear_gradients()
                total_loss += loss * batch_size
                total_loss_value = total_loss.numpy()

                batch_times.append(time.time() - batch_start)
                if batch_id > 0 and batch_id % 100 == 0:
                    print(
                        "-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, batch_cost: %.5f sec, reader_cost: %.5f sec, ips: %.5f words/sec"
                        % (epoch_id, batch_id,
                           np.exp(total_loss_value / word_count),
                           (time.time() - interval_time_start) / 100,
                           total_reader_cost / 100, word_count /
                           (time.time() - interval_time_start)))
                    ce_ppl.append(np.exp(total_loss_value / word_count))
                    total_loss = 0.0
                    word_count = 0.0
                    total_reader_cost = 0.0
                    interval_time_start = time.time()
                batch_start = time.time()

            train_epoch_cost = time.time() - epoch_start
            print(
                "\nTrain epoch:[%d]; epoch_cost: %.5f sec; avg_batch_cost: %.5f s/step\n"
                % (epoch_id, train_epoch_cost,
                   sum(batch_times) / len(batch_times)))
            ce_time.append(train_epoch_cost)

            dir_name = os.path.join(args.model_path, "epoch_" + str(epoch_id))
            print("begin to save", dir_name)
            paddle.fluid.save_dygraph(model.state_dict(), dir_name)
            print("save finished")
            dev_ppl = eval(valid_data)
            print("dev ppl", dev_ppl)
            test_ppl = eval(test_data)
            print("test ppl", test_ppl)

        if args.enable_ce:
            card_num = get_cards()
            _ppl = 0
            _time = 0
            try:
                _time = ce_time[-1]
                _ppl = ce_ppl[-1]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time))
            print("kpis\ttrain_ppl_card%s\t%f" % (card_num, _ppl))
Beispiel #22
0
    def eval(data):
        if args.inference_only and args.init_params_path:
            dirname = args.init_params_path
            filename = None
            if not os.path.isdir(args.init_params_path):
                dirname = os.path.dirname(args.init_params_path)
                filename = os.path.basename(args.init_params_path)
            fluid.io.load_persistables(exe,
                                       dirname,
                                       main_program=main_program,
                                       filename=filename)
            print("Load parameters from: %s." % args.init_params_path)

        batch_times = []
        start_time = time.time()
        # when eval the batch_size set to 1
        eval_data_iter = reader.get_data_iter(data, batch_size, num_steps)
        total_loss = 0.0
        iters = 0
        init_hidden = np.zeros((num_layers, batch_size, hidden_size),
                               dtype='float32')
        init_cell = np.zeros((num_layers, batch_size, hidden_size),
                             dtype='float32')
        for batch_id, batch in enumerate(eval_data_iter):
            input_data_feed = prepare_input(batch,
                                            init_hidden,
                                            init_cell,
                                            epoch_id=0,
                                            with_lr=False)

            batch_start_time = time.time()
            # eval should not run the grad op and change the parameters.
            # use Executor to eval
            fetch_outs = exe.run(
                program=inference_program,
                feed=input_data_feed,
                fetch_list=[loss.name, last_hidden.name, last_cell.name],
                use_program_cache=True)
            batch_times.append(time.time() - batch_start_time)

            cost_train = np.array(fetch_outs[0])
            init_hidden = np.array(fetch_outs[1])
            init_cell = np.array(fetch_outs[2])

            total_loss += cost_train
            iters += num_steps

        ppl = np.exp(total_loss / iters)

        eval_time_total = time.time() - start_time
        eval_time_run = np.sum(batch_times)

        # Benchmark
        if args.inference_only:
            print("\n======== Benchmark Result ========")
            print(
                "Eval batch_size: %d; Time (total): %.5f s; Time (only run): %.5f s; ppl: %.5f"
                % (batch_size, eval_time_total, eval_time_run, ppl[0]))
            print("")

            # Save the inference model for C++ inference purpose
            fluid.io.save_inference_model(save_model_dir,
                                          feed_order,
                                          [loss, last_hidden, last_cell],
                                          exe,
                                          main_program=inference_program,
                                          model_filename="model",
                                          params_filename="params")
            print("Save inference model to: %s." % save_model_dir)

        return ppl
Beispiel #23
0
    def train_an_epoch(epoch_id, batch_times):
        # get train epoch size
        num_batchs = len(train_data) // batch_size
        epoch_size = (num_batchs - 1) // num_steps
        if args.profile:
            log_interval = 1
        else:
            log_interval = max(1, epoch_size // 10)

        data_iter_size = batch_size
        if device_count > 1 and args.parallel:
            data_iter_size = batch_size * device_count
        train_data_iter = reader.get_data_iter(train_data, data_iter_size,
                                               num_steps)

        total_loss = 0
        iters = 0
        if device_count > 1 and args.parallel:
            init_hidden = np.zeros(
                (num_layers * device_count, batch_size, hidden_size),
                dtype='float32')
            init_cell = np.zeros(
                (num_layers * device_count, batch_size, hidden_size),
                dtype='float32')
        else:
            init_hidden = np.zeros((num_layers, batch_size, hidden_size),
                                   dtype='float32')
            init_cell = np.zeros((num_layers, batch_size, hidden_size),
                                 dtype='float32')
        for batch_id, batch in enumerate(train_data_iter):
            input_data_feed = prepare_input(batch,
                                            init_hidden,
                                            init_cell,
                                            epoch_id=epoch_id,
                                            device_count=device_count)

            batch_start_time = time.time()
            fetch_outs = exe.run(train_program,
                                 feed=input_data_feed,
                                 fetch_list=[
                                     loss.name, last_hidden.name,
                                     last_cell.name, "learning_rate"
                                 ],
                                 use_program_cache=True)
            batch_time = time.time() - batch_start_time
            batch_times.append(batch_time)

            cost_train = np.array(fetch_outs[0])
            init_hidden = np.array(fetch_outs[1])
            init_cell = np.array(fetch_outs[2])

            lr = np.array(fetch_outs[3])

            total_loss += cost_train
            iters += num_steps
            if batch_id > 0 and batch_id % log_interval == 0:
                ppl = np.exp(total_loss / iters)
                print(
                    "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f, lr: %.5f"
                    % (epoch_id, batch_id, batch_time, ppl[0], lr[0]))

            if args.profile:
                if batch_id == 1:
                    profiler.reset_profiler()
                elif batch_id >= 11:
                    break

        ppl = np.exp(total_loss / iters)
        return ppl
Beispiel #24
0
def train():

    model = BaseModel(batch_size=batch_size, maxlen=n_frames)
    loss, acc, output, no_grad_set = model.build_graph()

    main_program = fluid.default_main_program()
    inference_program = fluid.default_main_program().clone(for_test=True)

    optimizer = fluid.optimizer.Adadelta(0.001)
    optimizer.minimize(loss, no_grad_set=no_grad_set)

    place = fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    log_writter = LogWriter(log_path, sync_cycle=10)  

    with log_writter.mode("train") as logger:          
        log_train_loss = logger.scalar(tag="train_loss") 
        log_train_acc = logger.scalar(tag="train_acc")

    with log_writter.mode("validation") as logger:
        log_valid_loss = logger.scalar(tag="validation_loss")
        log_valid_acc = logger.scalar(tag="validation_acc")

    def prepare_input(batch):
        x, y, x_seqlen = batch
        res = {}

        res['input'] = np.array(x).astype("float32")
        res['input_seqlen'] = np.array(x_seqlen).astype("int64")
        res['label'] = np.array(y).astype("float32")

        return res

    # (samples, seq, width, height, pixel)
    noisy_movies, shifted_movies = reader.generate_movies(n_samples, n_frames)
    data = noisy_movies[:1000], shifted_movies[:1000]
    train_data, validation_data = split(data, validation_split)

    step_id = 0
    for epoch_id in range(max_epoch):
        start_time = time.time()
        print("epoch id", epoch_id)

        valid_data_iter = reader.get_data_iter(validation_data, batch_size) 
        train_data_iter = reader.get_data_iter(train_data, batch_size) 

        # train
        total_loss = 0
        batch_id = 0
        for batch in train_data_iter:
            input_data_feed = prepare_input(batch)
            fetch_outs = exe.run(program=main_program,
                                 feed=input_data_feed,
                                 fetch_list=[loss.name, acc.name],
                                 use_program_cache=False)

            cost_train = np.array(fetch_outs[0])
            acc_train = fetch_outs[1]
            total_loss += cost_train

            if batch_id > 0 and batch_id % 5 == 0:
                log_train_loss.add_record(step_id, total_loss) 
                log_train_acc.add_record(step_id, acc_train)
                step_id += 1
                print("current loss: %.7f, for batch %d"  % (total_loss, batch_id))
                total_loss = 0.0

            batch_id += 1


        # validate
        total_loss = 0
        total_acc = 0
        batch_id = 0
        for batch in valid_data_iter:
            input_data_feed = prepare_input(batch)
            fetch_outs = exe.run(program=inference_program,
                                 feed=input_data_feed,
                                 fetch_list=[loss.name, acc.name],
                                 use_program_cache=False)

            cost_train = np.array(fetch_outs[0])
            acc_train = fetch_outs[1]
            total_loss += cost_train
            batch_id += 1

        log_valid_loss.add_record(epoch_id, total_loss)
        log_valid_acc.add_record(epoch_id, total_acc / batch_id)
        print("validation loss: %.7f"  % (total_loss))

    fluid.io.save_inference_model(
        dirname=params_path,
        feeded_var_names=['input', 'input_seqlen'], 
        target_vars=[loss, acc], 
        executor=exe)
Beispiel #25
0
def train_ptb_lm():
    args = parse_args()

    # check if set use_gpu=True in paddlepaddle cpu version
    model_check.check_cuda(args.use_gpu)

    place = core.CPUPlace()
    if args.use_gpu == True:
        place = core.CUDAPlace(0)

    # check if paddlepaddle version is satisfied
    model_check.check_version()

    model_type = args.model_type

    vocab_size = 10000
    if model_type == "test":
        num_layers = 1
        batch_size = 2
        hidden_size = 10
        num_steps = 3
        init_scale = 0.1
        max_grad_norm = 5.0
        epoch_start_decay = 1
        max_epoch = 1
        dropout = 0.0
        lr_decay = 0.5
        base_learning_rate = 1.0
    elif model_type == "small":
        num_layers = 2
        batch_size = 20
        hidden_size = 200
        num_steps = 20
        init_scale = 0.1
        max_grad_norm = 5.0
        epoch_start_decay = 4
        max_epoch = 13
        dropout = 0.0
        lr_decay = 0.5
        base_learning_rate = 1.0
    elif model_type == "medium":
        num_layers = 2
        batch_size = 20
        hidden_size = 650
        num_steps = 35
        init_scale = 0.05
        max_grad_norm = 5.0
        epoch_start_decay = 6
        max_epoch = 39
        dropout = 0.5
        lr_decay = 0.8
        base_learning_rate = 1.0
    elif model_type == "large":
        num_layers = 2
        batch_size = 20
        hidden_size = 1500
        num_steps = 35
        init_scale = 0.04
        max_grad_norm = 10.0
        epoch_start_decay = 14
        max_epoch = 55
        dropout = 0.65
        lr_decay = 1.0 / 1.15
        base_learning_rate = 1.0
    else:
        print("model type not support")
        return

    with fluid.dygraph.guard(place):
        if args.ce:
            print("ce mode")
            seed = 33
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            max_epoch = 1
        ptb_model = PtbModel(
            hidden_size=hidden_size,
            vocab_size=vocab_size,
            num_layers=num_layers,
            num_steps=num_steps,
            init_scale=init_scale,
            dropout=dropout)

        if args.init_from_pretrain_model:
            if not os.path.exists(args.init_from_pretrain_model + '.pdparams'):
                print(args.init_from_pretrain_model)
                raise Warning("The pretrained params do not exist.")
                return
            fluid.load_dygraph(args.init_from_pretrain_model)
            print("finish initing model from pretrained params from %s" %
                  (args.init_from_pretrain_model))

        dy_param_updated = dict()
        dy_param_init = dict()
        dy_loss = None
        last_hidden = None
        last_cell = None

        data_path = args.data_path
        print("begin to load data")
        ptb_data = reader.get_ptb_data(data_path)
        print("finished load data")
        train_data, valid_data, test_data = ptb_data

        batch_len = len(train_data) // batch_size
        total_batch_size = (batch_len - 1) // num_steps
        log_interval = 200

        bd = []
        lr_arr = [1.0]
        for i in range(1, max_epoch):
            bd.append(total_batch_size * i)
            new_lr = base_learning_rate * (lr_decay**
                                           max(i + 1 - epoch_start_decay, 0.0))
            lr_arr.append(new_lr)

        sgd = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
            boundaries=bd, values=lr_arr), parameter_list=ptb_model.parameters())

        def eval(model, data):
            print("begin to eval")
            total_loss = 0.0
            iters = 0.0
            init_hidden_data = np.zeros(
                (num_layers, batch_size, hidden_size), dtype='float32')
            init_cell_data = np.zeros(
                (num_layers, batch_size, hidden_size), dtype='float32')

            model.eval()
            train_data_iter = reader.get_data_iter(data, batch_size, num_steps)
            for batch_id, batch in enumerate(train_data_iter):
                x_data, y_data = batch
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, num_steps, 1))
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden,
                                                            init_cell)

                out_loss = dy_loss.numpy()

                init_hidden_data = last_hidden.numpy()
                init_cell_data = last_cell.numpy()

                total_loss += out_loss
                iters += num_steps

            print("eval finished")
            ppl = np.exp(total_loss / iters)
            print("ppl ", batch_id, ppl[0])
            if args.ce:
                print("kpis\ttest_ppl\t%0.3f" % ppl[0])

        grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(max_grad_norm)
        
        for epoch_id in range(1):
            ptb_model.train()
            total_loss = 0.0
            iters = 0.0
            init_hidden_data = np.zeros(
                (num_layers, batch_size, hidden_size), dtype='float32')
            init_cell_data = np.zeros(
                (num_layers, batch_size, hidden_size), dtype='float32')

            train_data_iter = reader.get_data_iter(train_data, batch_size,
                                                   num_steps)
            init_hidden = to_variable(init_hidden_data)
            init_cell = to_variable(init_cell_data)
            start_time = time.time()
            start = time.time()
            for batch_id, batch in enumerate(train_data_iter):
                x_data, y_data = batch

                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, num_steps, 1))

                x = to_variable(x_data)
                y = to_variable(y_data)

                dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden,
                                                            init_cell)
                init_hidden = last_hidden
                init_cell = last_cell
                init_hidden.stop_gradient = True
                init_cell.stop_gradient = True
                out_loss = dy_loss.numpy()

                dy_loss.backward()
                sgd.minimize(dy_loss, grad_clip=grad_clip)

                ptb_model.clear_gradients()
                total_loss += out_loss
                iters += num_steps

                if batch_id > 0 and batch_id % log_interval == 0:
                    ppl = np.exp(total_loss / iters)
                    print("-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, lr: %.5f, loss: %.5f" %
                          (epoch_id, batch_id, ppl[0],
                           sgd._global_learning_rate().numpy(), out_loss))

            end = time.time()
            print("One epoch cost {}".format(end - start))
            print("one epoch finished", epoch_id)
            print("time cost ", time.time() - start_time)
            ppl = np.exp(total_loss / iters)
            print("-- Epoch:[%d]; ppl: %.5f" % (epoch_id, ppl[0]))

            if batch_size <= 20 and epoch_id == 0 and ppl[0] > 1000:
                # for bad init, after first epoch, the loss is over 1000
                # no more need to continue
                print("Parameters are randomly initialized and not good this time because the loss is over 1000 after the first epoch.")
                print("Abort this training process and please start again.")
                return 

            if args.ce:
                print("kpis\ttrain_ppl\t%0.3f" % ppl[0])
            save_model_dir = os.path.join(args.save_model_dir,
                                          str(epoch_id), 'params')
            fluid.save_dygraph(ptb_model.state_dict(), save_model_dir)
            print("Saved model to: %s.\n" % save_model_dir)

            eval(ptb_model, valid_data)

        eval(ptb_model, test_data)
Beispiel #26
0
def train():
    args = parse_args()
    model_type = args.model_type
    rnn_model = args.rnn_model
    logger = logging.getLogger("lm")
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    if args.enable_ce:
        fluid.default_startup_program().random_seed = SEED
    if args.log_path:
        file_handler = logging.FileHandler(args.log_path)
        file_handler.setLevel(logging.INFO)
        file_handler.setFormatter(formatter)
        logger.addHandler(file_handler)
    else:
        console_handler = logging.StreamHandler()
        console_handler.setLevel(logging.INFO)
        console_handler.setFormatter(formatter)
        logger.addHandler(console_handler)

    logger.info('Running with args : {}'.format(args))

    vocab_size = 10000
    if model_type == "test":
        num_layers = 1
        batch_size = 2
        hidden_size = 10
        num_steps = 3
        init_scale = 0.1
        max_grad_norm = 5.0
        epoch_start_decay = 1
        max_epoch = 1
        dropout = 0.0
        lr_decay = 0.5
        base_learning_rate = 1.0
    elif model_type == "small":
        num_layers = 2
        batch_size = 20
        hidden_size = 200
        num_steps = 20
        init_scale = 0.1
        max_grad_norm = 5.0
        epoch_start_decay = 4
        max_epoch = 13
        dropout = 0.0
        lr_decay = 0.5
        base_learning_rate = 1.0
    elif model_type == "medium":
        num_layers = 2
        batch_size = 20
        hidden_size = 650
        num_steps = 35
        init_scale = 0.05
        max_grad_norm = 5.0
        epoch_start_decay = 6
        max_epoch = 39
        dropout = 0.5
        lr_decay = 0.8
        base_learning_rate = 1.0
    elif model_type == "large":
        num_layers = 2
        batch_size = 20
        hidden_size = 1500
        num_steps = 35
        init_scale = 0.04
        max_grad_norm = 10.0
        epoch_start_decay = 14
        max_epoch = 55
        dropout = 0.65
        lr_decay = 1.0 / 1.15
        base_learning_rate = 1.0
    else:
        print("model type not support")
        return

    # Training process
    loss, last_hidden, last_cell, feed_order = lm_model.lm_model(
        hidden_size,
        vocab_size,
        batch_size,
        num_layers=num_layers,
        num_steps=num_steps,
        init_scale=init_scale,
        dropout=dropout,
        rnn_model=rnn_model)
    # clone from default main program and use it as the validation program
    main_program = fluid.default_main_program()
    inference_program = fluid.default_main_program().clone(for_test=True)

    fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm(
        clip_norm=max_grad_norm))

    learning_rate = fluid.layers.create_global_var(name="learning_rate",
                                                   shape=[1],
                                                   value=1.0,
                                                   dtype='float32',
                                                   persistable=True)

    optimizer = fluid.optimizer.SGD(learning_rate=learning_rate)

    optimizer.minimize(loss)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    data_path = args.data_path
    print("begin to load data")
    raw_data = reader.ptb_raw_data(data_path)
    print("finished load data")
    train_data, valid_data, test_data, _ = raw_data

    def prepare_input(batch, init_hidden, init_cell, epoch_id=0, with_lr=True):
        x, y = batch
        new_lr = base_learning_rate * (lr_decay**max(
            epoch_id + 1 - epoch_start_decay, 0.0))
        lr = np.ones((1), dtype='float32') * new_lr
        res = {}
        x = x.reshape((-1, num_steps, 1))
        y = y.reshape((-1, 1))

        res['x'] = x
        res['y'] = y
        res['init_hidden'] = init_hidden
        res['init_cell'] = init_cell
        if with_lr:
            res['learning_rate'] = lr

        return res

    def eval(data):
        # when eval the batch_size set to 1
        eval_data_iter = reader.get_data_iter(data, batch_size, num_steps)
        total_loss = 0.0
        iters = 0
        init_hidden = np.zeros((num_layers, batch_size, hidden_size),
                               dtype='float32')
        init_cell = np.zeros((num_layers, batch_size, hidden_size),
                             dtype='float32')
        for batch_id, batch in enumerate(eval_data_iter):
            input_data_feed = prepare_input(batch,
                                            init_hidden,
                                            init_cell,
                                            epoch_id,
                                            with_lr=False)
            fetch_outs = exe.run(
                inference_program,
                feed=input_data_feed,
                fetch_list=[loss.name, last_hidden.name, last_cell.name],
                use_program_cache=True)

            cost_train = np.array(fetch_outs[0])
            init_hidden = np.array(fetch_outs[1])
            init_cell = np.array(fetch_outs[2])

            total_loss += cost_train
            iters += num_steps

        ppl = np.exp(total_loss / iters)
        return ppl

    # get train epoch size
    batch_len = len(train_data) // batch_size
    epoch_size = (batch_len - 1) // num_steps
    log_interval = epoch_size // 10
    total_time = 0.0
    for epoch_id in range(max_epoch):
        start_time = time.time()
        print("epoch id", epoch_id)
        train_data_iter = reader.get_data_iter(train_data, batch_size,
                                               num_steps)

        total_loss = 0

        init_hidden = None
        init_cell = None
        #debug_para(fluid.framework.default_main_program(), parallel_executor)
        total_loss = 0
        iters = 0
        init_hidden = np.zeros((num_layers, batch_size, hidden_size),
                               dtype='float32')
        init_cell = np.zeros((num_layers, batch_size, hidden_size),
                             dtype='float32')
        for batch_id, batch in enumerate(train_data_iter):
            input_data_feed = prepare_input(batch,
                                            init_hidden,
                                            init_cell,
                                            epoch_id=epoch_id)
            fetch_outs = exe.run(feed=input_data_feed,
                                 fetch_list=[
                                     loss.name, last_hidden.name,
                                     last_cell.name, 'learning_rate'
                                 ],
                                 use_program_cache=True)

            cost_train = np.array(fetch_outs[0])
            init_hidden = np.array(fetch_outs[1])
            init_cell = np.array(fetch_outs[2])

            lr = np.array(fetch_outs[3])

            total_loss += cost_train
            iters += num_steps
            if batch_id > 0 and batch_id % log_interval == 0:
                ppl = np.exp(total_loss / iters)
                print("ppl ", batch_id, ppl[0], lr[0])

        ppl = np.exp(total_loss / iters)
        if epoch_id == 0 and ppl[0] > 1000:
            # for bad init, after first epoch, the loss is over 1000
            # no more need to continue
            return
        end_time = time.time()
        total_time += end_time - start_time
        print("train ppl", ppl[0])

        if epoch_id == max_epoch - 1 and args.enable_ce:
            card_num = get_cards()
            print("ptblm\tlstm_language_model_%s_duration_card%d\t%s" %
                  (args.rnn_model, card_num, total_time / max_epoch))
            print("ptblm\tlstm_language_model_%s_loss_card%d\t%s" %
                  (args.rnn_model, card_num, ppl[0]))

        model_path = os.path.join("model_new/", str(epoch_id))
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        fluid.io.save_persistables(executor=exe,
                                   dirname=model_path,
                                   main_program=main_program)
        valid_ppl = eval(valid_data)
        print("valid ppl", valid_ppl[0])
    test_ppl = eval(test_data)
    print("test ppl", test_ppl[0])
Beispiel #27
0
def train():
    args = parse_args()

    num_layers = args.num_layers
    src_vocab_size = args.src_vocab_size
    tar_vocab_size = args.tar_vocab_size
    batch_size = args.batch_size
    dropout = args.dropout
    init_scale = args.init_scale
    max_grad_norm = args.max_grad_norm
    hidden_size = args.hidden_size

    if args.enable_ce:
        fluid.default_main_program().random_seed = 102
        framework.default_startup_program().random_seed = 102

    # Training process

    if args.attention:
        model = AttentionModel(hidden_size,
                               src_vocab_size,
                               tar_vocab_size,
                               batch_size,
                               num_layers=num_layers,
                               init_scale=init_scale,
                               dropout=dropout)
    else:
        model = BaseModel(hidden_size,
                          src_vocab_size,
                          tar_vocab_size,
                          batch_size,
                          num_layers=num_layers,
                          init_scale=init_scale,
                          dropout=dropout)

    loss = model.build_graph()
    # clone from default main program and use it as the validation program
    main_program = fluid.default_main_program()
    inference_program = fluid.default_main_program().clone(for_test=True)

    fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm(
        clip_norm=max_grad_norm))

    lr = args.learning_rate
    opt_type = args.optimizer
    if opt_type == "sgd":
        optimizer = fluid.optimizer.SGD(lr)
    elif opt_type == "adam":
        optimizer = fluid.optimizer.Adam(lr)
    else:
        print("only support [sgd|adam]")
        raise Exception("opt type not support")

    optimizer.minimize(loss)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    train_data_prefix = args.train_data_prefix
    eval_data_prefix = args.eval_data_prefix
    test_data_prefix = args.test_data_prefix
    vocab_prefix = args.vocab_prefix
    src_lang = args.src_lang
    tar_lang = args.tar_lang
    print("begin to load data")
    raw_data = reader.raw_data(src_lang, tar_lang, vocab_prefix,
                               train_data_prefix, eval_data_prefix,
                               test_data_prefix, args.max_len)
    print("finished load data")
    train_data, valid_data, test_data, _ = raw_data

    def prepare_input(batch, epoch_id=0, with_lr=True):
        src_ids, src_mask, tar_ids, tar_mask = batch
        res = {}
        src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1], 1))
        in_tar = tar_ids[:, :-1]
        label_tar = tar_ids[:, 1:]

        in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1], 1))
        label_tar = label_tar.reshape(
            (label_tar.shape[0], label_tar.shape[1], 1))

        res['src'] = src_ids
        res['tar'] = in_tar
        res['label'] = label_tar
        res['src_sequence_length'] = src_mask
        res['tar_sequence_length'] = tar_mask

        return res, np.sum(tar_mask)

    # get train epoch size
    def eval(data, epoch_id=0):
        eval_data_iter = reader.get_data_iter(data, batch_size, mode='eval')
        total_loss = 0.0
        word_count = 0.0
        for batch_id, batch in enumerate(eval_data_iter):
            input_data_feed, word_num = prepare_input(batch,
                                                      epoch_id,
                                                      with_lr=False)
            fetch_outs = exe.run(inference_program,
                                 feed=input_data_feed,
                                 fetch_list=[loss.name],
                                 use_program_cache=False)

            cost_train = np.array(fetch_outs[0])

            total_loss += cost_train * batch_size
            word_count += word_num

        ppl = np.exp(total_loss / word_count)

        return ppl

    ce_time = []
    ce_ppl = []
    max_epoch = args.max_epoch
    for epoch_id in range(max_epoch):
        start_time = time.time()
        print("epoch id", epoch_id)
        if args.enable_ce:
            train_data_iter = reader.get_data_iter(train_data,
                                                   batch_size,
                                                   enable_ce=True)
        else:
            train_data_iter = reader.get_data_iter(train_data, batch_size)

        total_loss = 0
        word_count = 0.0
        for batch_id, batch in enumerate(train_data_iter):

            input_data_feed, word_num = prepare_input(batch, epoch_id=epoch_id)
            fetch_outs = exe.run(feed=input_data_feed,
                                 fetch_list=[loss.name],
                                 use_program_cache=True)

            cost_train = np.array(fetch_outs[0])

            total_loss += cost_train * batch_size
            word_count += word_num

            if batch_id > 0 and batch_id % 100 == 0:
                print("ppl", batch_id, np.exp(total_loss / word_count))
                ce_ppl.append(np.exp(total_loss / word_count))
                total_loss = 0.0
                word_count = 0.0
        end_time = time.time()
        time_gap = end_time - start_time
        ce_time.append(time_gap)

        dir_name = args.model_path + "/epoch_" + str(epoch_id)
        print("begin to save", dir_name)
        fluid.io.save_params(exe, dir_name)
        print("save finished")
        dev_ppl = eval(valid_data)
        print("dev ppl", dev_ppl)
        test_ppl = eval(test_data)
        print("test ppl", test_ppl)

    if args.enable_ce:
        card_num = get_cards()
        _ppl = 0
        _time = 0
        try:
            _time = ce_time[-1]
            _ppl = ce_ppl[-1]
        except:
            print("ce info error")
        print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time))
        print("kpis\ttrain_ppl_card%s\t%f" % (card_num, _ppl))
Beispiel #28
0
def train():
    def prepare_input(batch):
        src_ids, label = batch
        res = {}

        res['src'] = src_ids
        res['label'] = label

        return res

    # Set parameters:
    # ngram_range = 2 will add bi-grams features
    ngram_range = 2
    max_features = 20000
    maxlen = 400
    batch_size = 32
    embedding_dims = 50
    epochs = 5

    print('Loading data...')
    all_data = reader.raw_data(num_words=max_features)
    x_train, y_train, x_test, y_test = all_data

    print(len(x_train), 'train sequences')
    print(len(x_test), 'test sequences')
    print('Average train sequence length: {}'.format(
        np.mean(list(map(len, x_train)), dtype=int)))
    print('Average test sequence length: {}'.format(
        np.mean(list(map(len, x_test)), dtype=int)))

    if ngram_range > 1:
        print('Adding {}-gram features'.format(ngram_range))
        # Create set of unique n-gram from the training set.
        ngram_set = set()
        for input_list in x_train:
            for i in range(2, ngram_range + 1):
                set_of_ngram = create_ngram_set(input_list, ngram_value=i)
                ngram_set.update(set_of_ngram)

        # Dictionary mapping n-gram token to a unique integer.
        # Integer values are greater than max_features in order
        # to avoid collision with existing features.
        start_index = max_features + 1
        token_indice = {v: k + start_index for k, v in enumerate(ngram_set)}
        indice_token = {token_indice[k]: k for k in token_indice}

        # max_features is the highest integer that could be found in the dataset.
        max_features = np.max(list(indice_token.keys())) + 1

        # Augmenting x_train and x_test with n-grams features
        x_train = add_ngram(x_train, token_indice, ngram_range)
        x_test = add_ngram(x_test, token_indice, ngram_range)
        print('Average train sequence length: {}'.format(
            np.mean(list(map(len, x_train)), dtype=int)))
        print('Average test sequence length: {}'.format(
            np.mean(list(map(len, x_test)), dtype=int)))

    print('Pad sequences (samples x time)')
    x_train = reader.pad_sequences(x_train, maxlen=maxlen)
    x_test = reader.pad_sequences(x_test, maxlen=maxlen)
    print('x_train shape:', x_train.shape)
    print('x_test shape:', x_test.shape)

    all_data = x_train, y_train, x_test, y_test

    print('Build model...')
    model = BaseModel(max_features=max_features)
    loss, acc = model.build_graph()

    main_program = fluid.default_main_program()
    inference_program = fluid.default_main_program().clone(for_test=True)

    optimizer = fluid.optimizer.Adam(0.01)
    optimizer.minimize(loss)

    place = fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    for epoch_id in range(epochs):
        start_time = time.time()
        print("epoch id", epoch_id)

        train_data_iter = reader.get_data_iter(all_data, batch_size)

        total_loss = 0
        total_acc = 0
        batch_id = 0
        for batch in train_data_iter:

            input_data_feed = prepare_input(batch)
            fetch_outs = exe.run(feed=input_data_feed,
                                 fetch_list=[loss.name, acc.name],
                                 use_program_cache=False)

            cost_train = np.array(fetch_outs[0])
            acc_train = np.array(fetch_outs[1])
            total_loss += cost_train
            total_acc += acc_train

            if batch_id > 0 and batch_id % 10 == 0:
                print("current loss: %.3f, current acc: %.3f for step %d" %
                      (total_loss, total_acc * 0.1, batch_id))
                total_loss = 0.0
                total_acc = 0.0

            batch_id += 1

    test_data_iter = reader.get_data_iter(all_data, batch_size, mode='test')

    all_acc = []

    for batch in test_data_iter:
        input_data_feed = prepare_input(batch)
        fetch_outs = exe.run(program=inference_program,
                             feed=input_data_feed,
                             fetch_list=[loss.name, acc.name],
                             use_program_cache=False)

        all_acc.append(fetch_outs[1])

    all_acc = np.array(all_acc).astype("float32")

    print("test acc: %.3f" % all_acc.mean())
Beispiel #29
0
    def train():
        ce_time = []
        ce_ppl = []
        max_epoch = args.max_epoch
        for epoch_id in range(max_epoch):
            start_time = time.time()
            if args.enable_ce:
                train_data_iter = reader.get_data_iter(train_data,
                                                       batch_size,
                                                       enable_ce=True)
            else:
                train_data_iter = reader.get_data_iter(train_data, batch_size)

            total_loss = 0
            word_count = 0.0
            batch_times = []
            time_interval = 0.0
            batch_start_time = time.time()
            epoch_word_count = 0.0
            total_reader_cost = 0.0
            batch_read_start = time.time()
            for batch_id, batch in enumerate(train_data_iter):
                input_data_feed, word_num = prepare_input(batch,
                                                          epoch_id=epoch_id)
                word_count += word_num
                total_reader_cost += time.time() - batch_read_start
                fetch_outs = exe.run(program=CompiledProgram,
                                     feed=input_data_feed,
                                     fetch_list=[loss.name],
                                     use_program_cache=True)

                cost_train = np.mean(fetch_outs[0])
                # print(cost_train)
                total_loss += cost_train * batch_size
                batch_end_time = time.time()
                batch_time = batch_end_time - batch_start_time
                batch_times.append(batch_time)
                time_interval += batch_time
                epoch_word_count += word_num

                if batch_id > 0 and batch_id % 100 == 0:
                    print(
                        "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f; reader cost: %0.5f s; ips: %0.5f tokens/sec"
                        % (epoch_id, batch_id, batch_time,
                           np.exp(total_loss / word_count), total_reader_cost /
                           100, word_count / time_interval))
                    ce_ppl.append(np.exp(total_loss / word_count))
                    total_loss = 0.0
                    word_count = 0.0
                    time_interval = 0.0
                    total_reader_cost = 0.0

                # profiler tools
                if args.profile and epoch_id == 0 and batch_id == 100:
                    profiler.reset_profiler()
                elif args.profile and epoch_id == 0 and batch_id == 105:
                    return
                batch_start_time = time.time()
                batch_read_start = time.time()

            end_time = time.time()
            epoch_time = end_time - start_time
            ce_time.append(epoch_time)
            print(
                "\nTrain epoch:[%d]; Epoch Time: %.5f; avg_time: %.5f s/step; ips: %0.5f tokens/sec\n"
                % (epoch_id, epoch_time, sum(batch_times) / len(batch_times),
                   epoch_word_count / sum(batch_times)))

            if not args.profile:
                save_path = os.path.join(args.model_path,
                                         "epoch_" + str(epoch_id),
                                         "checkpoint")
                print("begin to save", save_path)
                fluid.save(train_program, save_path)
                print("save finished")
                dev_ppl = eval(valid_data)
                print("dev ppl", dev_ppl)
                test_ppl = eval(test_data)
                print("test ppl", test_ppl)

        if args.enable_ce:
            card_num = get_cards()
            _ppl = 0
            _time = 0
            try:
                _time = ce_time[-1]
                _ppl = ce_ppl[-1]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time))
            print("kpis\ttrain_ppl_card%s\t%f" % (card_num, _ppl))
Beispiel #30
0
def train():
    raw_data, raw_data_test = reader.get_lt5_data()

    model = BaseModel(fine_tune=False)
    loss, acc, output = model.build_graph()

    main_program = fluid.default_main_program()
    test_program = main_program.clone(for_test=True)

    optimizer = fluid.optimizer.Adadelta(0.01)
    optimizer.minimize(loss)

    place = fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    def prepare_input(batch, epoch_id=0):
        x, y = batch
        res = {}

        res['img'] = np.array(x).astype("float32") / 255
        res['label'] = np.array(y).astype("int64")

        return res

    def train_test(test_batch):
        total_acc = []
        input_data_feed = prepare_input(test_batch)
        fetch_outs = exe.run(program=test_program,
                             feed=input_data_feed,
                             fetch_list=[acc.name],
                             use_program_cache=True)

        acc_train = np.array(fetch_outs[0])
        total_acc.append(acc_train)
        print("test avg acc: {0:.2%}".format(np.mean(total_acc)))

    for epoch_id in range(epochs):
        print("epoch id", epoch_id)

        train_data_iter = reader.get_data_iter(raw_data, batch_size)
        test_data_iter  = reader.get_data_iter(raw_data_test, batch_size)

        data_iter = zip(train_data_iter, test_data_iter)

        total_loss = 0
        total_acc = []
        for batch_id, batch in enumerate(data_iter):
            batch_train, batch_test = batch
            input_data_feed = prepare_input(batch_train)
            fetch_outs = exe.run(program=main_program,
                                 feed=input_data_feed,
                                 fetch_list=[loss.name, acc.name],
                                 use_program_cache=True)

            cost_train = np.array(fetch_outs[0])
            acc_train = np.array(fetch_outs[1])
            total_loss += cost_train * batch_size
            total_acc.append(acc_train)

        print("train total loss: ", total_loss, np.mean(total_acc))
        train_test(batch_test)
        print()

    shutil.rmtree(temp_model_path, ignore_errors=True)
    os.makedirs(temp_model_path)
    fluid.io.save_params(executor=exe, dirname=temp_model_path)