def save(self, directory: Optional[str] = None, callbacks: Union[List[core.AgentCallback], core.AgentCallback, None] = None) -> str: """Saves the currently trained actor policy in directory. If save is called before a trained policy is created, eg by calling train, an exception is raised. Args: directory: the directory to save the policy weights to. if the directory does not exist yet, a new directory is created. if None the policy is saved in a temp directory. callbacks: list of callbacks called during save (eg log.Agent) Returns: the absolute path to the directory containing the saved policy. """ if directory is None: directory = bcore._get_temp_path() assert directory assert self._backend_agent._agent_context._is_policy_trained, "No trained policy available. Call train() first." directory = bcore._mkdir(directory) agent_json_path = os.path.join(directory, EasyAgent._KEY_EASYAGENT_FILENAME) with open(agent_json_path, 'w') as jsonfile: agent_dict = self._to_dict() json.dump(agent_dict, jsonfile, sort_keys=True, indent=2) callbacks = self._to_callback_list(callbacks=callbacks) policy_directory = os.path.join(directory, EasyAgent._KEY_POLICY_DIRECTORY) policy_directory = bcore._mkdir(policy_directory) self._backend_agent.save(directory=policy_directory, callbacks=callbacks) return directory
def test_save_load(self): model_config = core.ModelConfig(_lineworld_name) random_agent = tfagents.TfRandomAgent(model_config=model_config) tempdir = bcore._get_temp_path() bcore._mkdir(tempdir) random_agent.save(directory=tempdir, callbacks=[]) random_agent.load(directory=tempdir, callbacks=[]) bcore._rmpath(tempdir)
def test_save_(self): from easyagents.backends import tforce model_config = core.ModelConfig(_cartpole_name) tc = core.PpoTrainContext() tc.num_iterations = 3 ppo_agent = tforce.TforcePpoAgent(model_config=model_config) ppo_agent.train(train_context=tc, callbacks=[log.Iteration(), log.Agent()]) tempdir = bcore._get_temp_path() bcore._mkdir(tempdir) ppo_agent.save(tempdir, []) loaded_agent = tforce.TforcePpoAgent(model_config=model_config) loaded_agent.load(tempdir, []) bcore._rmpath(tempdir)
def test_save_load(self): model_config = core.ModelConfig(_lineworld_name) tc = core.PpoTrainContext() ppo_agent = tfagents.TfPpoAgent(model_config=model_config) ppo_agent.train( train_context=tc, callbacks=[duration._SingleIteration(), log.Iteration()]) tempdir = bcore._get_temp_path() bcore._mkdir(tempdir) ppo_agent.save(tempdir, []) ppo_agent = tfagents.TfPpoAgent(model_config=model_config) ppo_agent.load(tempdir, []) pc = core.PlayContext() pc.max_steps_per_episode = 10 pc.num_episodes = 1 ppo_agent.play(play_context=pc, callbacks=[]) bcore._rmpath(tempdir)
def __init__(self, directory: str = None): """Saves the best policies (along with the agent definition) in directory. If directory is None the policies are written in a temp directory. Args: directory: the directory to save to, if None a temp directory is created. """ directory = directory if directory else bcore._get_temp_path() self.directory: str = bcore._mkdir(directory) self.saved_agents: List[Tuple[int, float, str]] = []