gen_optimizer.add_hook(chainer.optimizer.GradientClipping(gen_grad_clip))

dis_optimizer = optimizers.Adam(alpha=1e-4)
dis_optimizer.setup(discriminator)
dis_optimizer.add_hook(NamedWeightDecay(dis_l2_reg_lambda, '/out/'))

if not args.gen:

    print('Start pre-training generator...')

    for epoch in range(gen_pretrain_epoch):

        pre_train_loss = []
        for _ in range(gen_data_loader.num_batch):
            batch = gen_data_loader.next_batch()
            g_loss = generator.pretrain_step(batch)
            # gen_optimizer.zero_grads()
            gen_optimizer.reallocate_cleared_grads()
            g_loss.backward()
            gen_optimizer.update()
            pre_train_loss.append(g_loss.data)

        # print('pre-train epoch ', epoch, 'train_loss ', np.mean(pre_train_loss))
        generate_samples_neg(generator, gen_batch_size, 1000, eval_file)
        likelihood_data_loader.create_batches(eval_file)
        test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
        print('pre-train epoch: {}  test loss: {}  lr: {}'.format(epoch, test_loss, gen_optimizer.lr))
        test_count += 1
        summary = sess.run(target_loss_summary, feed_dict={loss_: test_loss})
        summary_writer.add_summary(summary, test_count)
        pre_train_loss = []
        perm = np.random.permutation(train_num)
        for i in range(0, train_num, batch_size):
            batch = train_comment_data[perm[i:i+batch_size]]
            tag_batch = train_tag_data[perm[i:i + batch_size]]

            if args.ae_pretrain:
                g_loss = generator.pretrain_step_autoencoder(batch)
                enc_optimizer.zero_grads()
                gen_optimizer.zero_grads()
                g_loss.backward()
                enc_optimizer.update()
                gen_optimizer.update()
                pre_train_loss.append(float(g_loss.data))
            else:
                g_loss = generator.pretrain_step(batch, tag_batch)
                gen_optimizer.zero_grads()
                g_loss.backward()
                gen_optimizer.update()
                pre_train_loss.append(float(g_loss.data))

            # progress report
            gen_train_count += 1
            progress_report(gen_train_count, start, batch_size)

        # test
        test_loss = []
        perm = np.random.permutation(test_num)

        for i in range(0, test_num, batch_size):
            batch = test_comment_data[perm[i:i + batch_size]]