# roll out generator
rollout_generator = copy.deepcopy(generator)
if pool:
    rollout_generator.to_cpu()
rollout_params = np.asanyarray(tuple(param.data for param in rollout_generator.params()))

print('#########################################################################')
print('Start Reinforcement Training ...')

for epoch in range(1, total_epoch):

    print('total batch: ', epoch)

    for step in range(g_steps):
        samples = generator.generate(gen_batch_size, train=True, random_input=True)
        rewards = rollout_generator.get_rewards(samples, discriminator, rollout_num=16, pool=pool, gpu=args.gpu)
        print(rewards[:30])
        loss = generator.reinforcement_step(samples, rewards, g_steps=g_steps, random_input=True)
        gen_optimizer.zero_grads()
        loss.backward()
        gen_optimizer.update()
        print(' Reinforce step {}/{}'.format(step+1, g_steps))

    for i, param in enumerate(generator.params()):
        if pool:
            rollout_params[i] += rollout_update_ratio * (cuda.to_cpu(param.data) - rollout_params[i])
        else:
            rollout_params[i] += rollout_update_ratio * (param.data - rollout_params[i])

    for step in range(d_steps):
        # test
        test_loss = []
        for _ in range(test_num // batch_size):
            batch = arasuji.get_test_data(batch_size)
            g_loss = generator.pretrain_step(batch)
            test_loss.append(float(g_loss.data))
        test_count += 1

        print('\npre-train epoch {}  train_loss {}  test_loss {}'.format(epoch, np.mean(pre_train_loss),
                                                                         np.mean(test_loss)))
        summary = sess.run(train_loss_summary, feed_dict={loss_: np.mean(pre_train_loss)})
        summary_writer.add_summary(summary, test_count)
        summary = sess.run(test_loss_summary, feed_dict={loss_: np.mean(test_loss)})
        summary_writer.add_summary(summary, test_count)
        samples = generator.generate(10, train=False)
        with open(os.path.join(out_dir, "generated_sample_pretrain.txt"), 'a', encoding='utf-8') as f:
            f.write('\npre-train epoch {}  train_loss {} test_loss {} \n'.format(epoch, np.mean(pre_train_loss),
                                                                                 np.mean(test_loss)))
            for x in samples:
                f.write(''.join([arasuji.vocab[w] for w in x]) + '\n')

    serializers.save_hdf5(os.path.join(out_dir, "models", "gen_pretrain.model"), generator)

else:
    # test
    test_loss = []
    for _ in range(test_num // batch_size):
        batch = arasuji.get_test_data(batch_size)
        g_loss = generator.pretrain_step(batch)
        test_loss.append(float(g_loss.data))
        for i in range(0, test_num, batch_size):
            batch = test_comment_data[perm[i:i + batch_size]]
            tag_batch = test_tag_data[perm[i:i + batch_size]]

            g_loss = generator.pretrain_step(batch, tag_batch)
            test_loss.append(float(g_loss.data))
        test_count += 1

        print('\npre-train epoch {}  train_loss {}  test_loss {}'.format(epoch, np.mean(pre_train_loss),
                                                                         np.mean(test_loss)))
        summary = sess.run(train_loss_summary, feed_dict={loss_: np.mean(pre_train_loss)})
        summary_writer.add_summary(summary, test_count)
        summary = sess.run(test_loss_summary, feed_dict={loss_: np.mean(test_loss)})
        summary_writer.add_summary(summary, test_count)
        samples = generator.generate(10, train=False)
        with open(os.path.join(out_dir, "generated_sample_pretrain.txt"), 'a', encoding='utf-8') as f:
            f.write('\npre-train epoch {}  train_loss {} test_loss {} \n'.format(epoch, np.mean(pre_train_loss),
                                                                                 np.mean(test_loss)))
            for x in samples:
                f.write(''.join([vocab[w] for w in x]) + '\n')

        if epoch % 1 == 0:
            serializers.save_hdf5(os.path.join(out_dir, "models", "gen_pretrain_{}.model".format(epoch)), generator)
    batch_size /= 20
else:
    # test
    test_loss = []
    perm = np.random.permutation(test_num)
    for i in range(0, test_num, batch_size):
        batch = test_comment_data[perm[i:i + batch_size]]
Exemple #4
0
            summary_writer.add_summary(summary, test_count)
            summary = sess.run(train_kl_loss_summary,
                               feed_dict={loss_: np.mean(sum_kl_loss)})
            summary_writer.add_summary(summary, test_count)

            summary = sess.run(test_loss_summary,
                               feed_dict={loss_: np.mean(test_loss)})
            summary_writer.add_summary(summary, test_count)
            summary = sess.run(test_g_loss_summary,
                               feed_dict={loss_: np.mean(sum_test_g_loss)})
            summary_writer.add_summary(summary, test_count)
            summary = sess.run(test_kl_loss_summary,
                               feed_dict={loss_: np.mean(sum_test_kl_loss)})
            summary_writer.add_summary(summary, test_count)

            samples = generator.generate(10, train=False)

            with open(os.path.join(out_dir, "generated_sample_pretrain.txt"),
                      'a',
                      encoding='utf-8') as f:
                f.write('\npre-train epoch {}  train_loss {} test_loss {} \n'.
                        format(epoch, np.mean(pre_train_loss),
                               np.mean(test_loss)))
                for x in samples:
                    f.write(''.join([vocab[w] for w in x]) + '\n')
        else:
            print('\npre-train epoch {}  train_loss {}  test_loss {}'.format(
                epoch, np.mean(pre_train_loss), np.mean(test_loss)))
            summary = sess.run(train_loss_summary,
                               feed_dict={loss_: np.mean(pre_train_loss)})
            summary_writer.add_summary(summary, test_count)
# roll out generator
rollout_generator = copy.deepcopy(generator)
if pool:
    rollout_generator.to_cpu()
rollout_params = np.asanyarray(tuple(param.data for param in rollout_generator.params()))

print('#########################################################################')
print('Start Reinforcement Training ...')

for epoch in range(1, total_epoch):

    print('total batch: ', epoch)

    for step in range(g_steps):
        samples = generator.generate(gen_batch_size, train=True)
        rewards = rollout_generator.get_rewards(samples, discriminator, rollout_num=16, pool=pool, gpu=args.gpu)
        print(rewards[:30])
        loss = generator.reinforcement_step(samples, rewards, g_steps=g_steps)
        gen_optimizer.zero_grads()
        loss.backward()
        gen_optimizer.update()
        print(' Reinforce step {}/{}'.format(step+1, g_steps))

    for i, param in enumerate(generator.params()):
        if pool:
            rollout_params[i] += rollout_update_ratio * (cuda.to_cpu(param.data) - rollout_params[i])
        else:
            rollout_params[i] += rollout_update_ratio * (param.data - rollout_params[i])

    for step in range(d_steps):