Beispiel #1
0
opt = optimizer.SGDOptimizer(hp.lr)
train_op = opt.minimize(loss)
executor = ad.Executor([loss, train_op], ctx=ctx)

logging.info("# Session")

for ep in range(hp.num_epochs):
    dataloader.make_epoch_data(hp.batch_size)
    for i in tqdm(range(dataloader.batch_num)):
        xs_val, ys_val = dataloader.get_batch()
        # st = time.time()
        xs_val = xs_val[0]
        ys1_val = ys_val[0][:, :-1]
        ys2_val = ys_val[0][:, 1:]
        nonpadding_val = np.not_equal(ys2_val,
                                      dataloader.get_pad()).astype(np.float32)
        _loss, _ = executor.run(feed_dict={
            xs: xs_val,
            ys1: ys1_val,
            ys2: ys2_val,
            nonpadding: nonpadding_val
        })
        # en = time.time()
        # if i == 100:
        #     exit()

        log_str = 'Iteration %d, loss %f' % (i, _loss.asnumpy())
        print(log_str)
        # print('time: ', (en - st))

logging.info("Done")
Beispiel #2
0
parser = hparams.parser
hp = parser.parse_args()
print(hp)
# save_hparams(hp, hp.logdir)

logging.info("# Prepare train/eval batches")
dataloader = DataLoader(hp.train1, hp.train2, hp.maxlen1, hp.maxlen2, hp.vocab)

xs = tf.placeholder(name='xs', dtype=tf.int32, shape=[16, 100])
ys1 = tf.placeholder(name='ys1', dtype=tf.int32, shape=[16, 99])
ys2 = tf.placeholder(name='ys2', dtype=tf.int32, shape=[16, 99])

logging.info("# Load model")
m = Transformer(hp)
loss = m.train(xs, (ys1, ys2))
nonpadding = tf.to_float(tf.not_equal(ys2, dataloader.get_pad()))  # 0: <pad>
loss = tf.reduce_sum(loss * nonpadding) / (tf.reduce_sum(nonpadding) + 1e-7)

global_step = tf.train.get_or_create_global_step()
optimizer = tf.train.GradientDescentOptimizer(hp.lr)
train_op = optimizer.minimize(loss, global_step=global_step)
# y_hat, eval_summaries = m.eval(xs, ys)
# y_hat = m.infer(xs, ys)

logging.info("# Session")
saver = tf.train.Saver(max_to_keep=hp.num_epochs)
with tf.Session() as sess:
    ckpt = tf.train.latest_checkpoint(hp.logdir)
    if ckpt is None:
        logging.info("Initializing from scratch")
        sess.run(tf.global_variables_initializer())