def _save_checkpoint(self, episode: int, score: float): checkpoint_dir = os.path.join(os.curdir, 'output', get_run_id()) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) torch.save( self._network.state_dict(), os.path.join(checkpoint_dir, f'episode{episode:04d}_score{score:04.2f}.weights'))
os.makedirs(checkpoint_dir) torch.save( self._network.state_dict(), os.path.join(checkpoint_dir, f'episode{episode:04d}_score{score:04.2f}.weights')) def _load_checkpoint(self, checkpoint: str): self._network.load_state_dict(torch.load(checkpoint)) self._network.eval() if __name__ == '__main__': env = UnityEnvironment( file_name=os.path.join(os.curdir, 'bin', 'Banana.x86_64')) tb = init_tensorboard_logger(os.path.join(os.pardir, 'tensorboard'), get_run_id()) algo = DQN(env=env, state_space=37, action_space=4, network_builder=DeepQNetwork, use_double_dqn=True, gamma=0.99, batch_size=64, target_update=1000, use_soft_updates=True, policy_update=4, min_samples_in_memory=64, memory_capacity=100000, eps_fn=linear_decay(1.0, 0.05, 100000), device='cuda', tb_logger=tb)