コード例 #1
0
    def save(self, directory: Optional[str] = None,
             callbacks: Union[List[core.AgentCallback], core.AgentCallback, None] = None) -> str:
        """Saves the currently trained actor policy in directory.

        If save is called before a trained policy is created, eg by calling train, an exception is raised.

        Args:
             directory: the directory to save the policy weights to.
                if the directory does not exist yet, a new directory is created. if None the policy is saved
                in a temp directory.
             callbacks: list of callbacks called during save (eg log.Agent)

        Returns:
            the absolute path to the directory containing the saved policy.
        """
        if directory is None:
            directory = bcore._get_temp_path()
        assert directory
        assert self._backend_agent._agent_context._is_policy_trained, "No trained policy available. Call train() first."

        directory = bcore._mkdir(directory)
        agent_json_path = os.path.join(directory, EasyAgent._KEY_EASYAGENT_FILENAME)
        with open(agent_json_path, 'w') as jsonfile:
            agent_dict = self._to_dict()
            json.dump(agent_dict, jsonfile, sort_keys=True, indent=2)
        callbacks = self._to_callback_list(callbacks=callbacks)
        policy_directory = os.path.join(directory, EasyAgent._KEY_POLICY_DIRECTORY)
        policy_directory = bcore._mkdir(policy_directory)
        self._backend_agent.save(directory=policy_directory, callbacks=callbacks)
        return directory
コード例 #2
0
 def test_save_load(self):
     model_config = core.ModelConfig(_lineworld_name)
     random_agent = tfagents.TfRandomAgent(model_config=model_config)
     tempdir = bcore._get_temp_path()
     bcore._mkdir(tempdir)
     random_agent.save(directory=tempdir, callbacks=[])
     random_agent.load(directory=tempdir, callbacks=[])
     bcore._rmpath(tempdir)
コード例 #3
0
    def test_save_(self):
        from easyagents.backends import tforce

        model_config = core.ModelConfig(_cartpole_name)
        tc = core.PpoTrainContext()
        tc.num_iterations = 3
        ppo_agent = tforce.TforcePpoAgent(model_config=model_config)
        ppo_agent.train(train_context=tc, callbacks=[log.Iteration(), log.Agent()])
        tempdir = bcore._get_temp_path()
        bcore._mkdir(tempdir)
        ppo_agent.save(tempdir, [])

        loaded_agent = tforce.TforcePpoAgent(model_config=model_config)
        loaded_agent.load(tempdir, [])
        bcore._rmpath(tempdir)
コード例 #4
0
 def test_save_load(self):
     model_config = core.ModelConfig(_lineworld_name)
     tc = core.PpoTrainContext()
     ppo_agent = tfagents.TfPpoAgent(model_config=model_config)
     ppo_agent.train(
         train_context=tc,
         callbacks=[duration._SingleIteration(),
                    log.Iteration()])
     tempdir = bcore._get_temp_path()
     bcore._mkdir(tempdir)
     ppo_agent.save(tempdir, [])
     ppo_agent = tfagents.TfPpoAgent(model_config=model_config)
     ppo_agent.load(tempdir, [])
     pc = core.PlayContext()
     pc.max_steps_per_episode = 10
     pc.num_episodes = 1
     ppo_agent.play(play_context=pc, callbacks=[])
     bcore._rmpath(tempdir)
コード例 #5
0
    def __init__(self, directory: str = None):
        """Saves the best policies (along with the agent definition) in directory.
        If directory is None the policies are written in a temp directory.

        Args:
            directory: the directory to save to, if None a temp directory is created.
        """
        directory = directory if directory else bcore._get_temp_path()
        self.directory: str = bcore._mkdir(directory)
        self.saved_agents: List[Tuple[int, float, str]] = []