if pool: rollout_generator.to_cpu() rollout_params = np.asanyarray(tuple(param.data for param in rollout_generator.params())) print('#########################################################################') print('Start Reinforcement Training ...') for epoch in range(1, total_epoch): print('total batch: ', epoch) for step in range(g_steps): samples = generator.generate(gen_batch_size, train=True, random_input=True) rewards = rollout_generator.get_rewards(samples, discriminator, rollout_num=16, pool=pool, gpu=args.gpu) print(rewards[:30]) loss = generator.reinforcement_step(samples, rewards, g_steps=g_steps, random_input=True) gen_optimizer.zero_grads() loss.backward() gen_optimizer.update() print(' Reinforce step {}/{}'.format(step+1, g_steps)) for i, param in enumerate(generator.params()): if pool: rollout_params[i] += rollout_update_ratio * (cuda.to_cpu(param.data) - rollout_params[i]) else: rollout_params[i] += rollout_update_ratio * (param.data - rollout_params[i]) for step in range(d_steps): # generate for discriminator generate_samples_neg(generator, gen_batch_size, generated_num, negative_file)
rollout_params = np.asanyarray(tuple(param.data for param in rollout_generator.params())) print('#########################################################################\n') print('Start Reinforcement Training ...') start = time.time() for epoch in range(1, args.total_epoch): print('total epoch ', epoch) tmp = time.time() # g-step mean_time = 0 for step in range(args.g_steps): samples = generator.generate(batch_size, train=True) rewards = rollout_generator.get_rewards(samples, discriminator, pool=pool, gpu=args.gpu) loss = generator.reinforcement_step(samples, rewards, g_steps=args.g_steps) generator.cleargrads() loss.backward() gen_optimizer.update() duration = time.time() - start step_time = time.time() - tmp mean_time += step_time tmp = time.time() sys.stderr.write('\rreinforce step {}/{} time: {} ({:.2f} sec/step)'.format( step + 1, args.g_steps, str(datetime.timedelta(seconds=duration)).split('.')[0], step_time)) else: print('\rreinforce step {}/{} time: {} ({:.2f} sec/step)'.format( step + 1, args.g_steps, str(datetime.timedelta(seconds=duration)).split('.')[0], mean_time/args.g_steps)) # update rollout generator