Example #1
0
 def _save_checkpoint(self, episode: int, score: float):
     checkpoint_dir = os.path.join(os.curdir, 'output', get_run_id())
     if not os.path.exists(checkpoint_dir):
         os.makedirs(checkpoint_dir)
     torch.save(
         self._network.state_dict(),
         os.path.join(checkpoint_dir,
                      f'episode{episode:04d}_score{score:04.2f}.weights'))
Example #2
0
            os.makedirs(checkpoint_dir)
        torch.save(
            self._network.state_dict(),
            os.path.join(checkpoint_dir,
                         f'episode{episode:04d}_score{score:04.2f}.weights'))

    def _load_checkpoint(self, checkpoint: str):
        self._network.load_state_dict(torch.load(checkpoint))
        self._network.eval()


if __name__ == '__main__':
    env = UnityEnvironment(
        file_name=os.path.join(os.curdir, 'bin', 'Banana.x86_64'))
    tb = init_tensorboard_logger(os.path.join(os.pardir, 'tensorboard'),
                                 get_run_id())
    algo = DQN(env=env,
               state_space=37,
               action_space=4,
               network_builder=DeepQNetwork,
               use_double_dqn=True,
               gamma=0.99,
               batch_size=64,
               target_update=1000,
               use_soft_updates=True,
               policy_update=4,
               min_samples_in_memory=64,
               memory_capacity=100000,
               eps_fn=linear_decay(1.0, 0.05, 100000),
               device='cuda',
               tb_logger=tb)