def _run_train_epoch(trainer, epoch, num_episodes, save_path): for episode in six.moves.range(1, num_episodes + 1): episode_start_time = time.time() _LOG.info("----- EPISODE: %d/%d [EPOCH: %d]", episode, num_episodes, epoch) trainer.run(num_episodes=1, train_steps=0) _LOG.info("Elapsed: %.2fs", time.time() - episode_start_time)
def _run_train_epoch(trainer, epoch, num_cycles, num_episodes, save_path): for cycle in six.moves.range(1, num_cycles + 1): cycle_start_time = time.time() _LOG.info("----- CYCLE: %d/%d [EPOCH: %d]", cycle, num_cycles, epoch) _LOG.info("Running %d episodes", num_episodes) trainer.run(num_episodes=num_episodes, train_steps=trainer.agent.env.spec.max_episode_steps) _LOG.info("Elapsed: %.2fs", time.time() - cycle_start_time) save_start_time = time.time() trainer.agent.save(save_path, replay_buffer=True) _LOG.info("Agent saved [%.2fs]", time.time() - save_start_time)