def train(train_agent: BaseAlgorithm, max_timesteps: int, verbose: bool = True) -> str: """Train a model on a specific environment using a given agent. Note that the agent is associated with a given reinforcement learning algorithm, and instanciated for a specific environment and neural network model. Thus, it already wraps all the required information to actually perform training. .. note:: This function can be terminated early using CTRL+C. :param train_agent: Training agent. :param max_timesteps: Number of maximum training timesteps. :param verbose: Whether or not to print information about what is going on. Optional: True by default. :returns: Fullpath of agent's final state dump. Note that it also contains the trained neural network model. """ # Get testing environment spec spec = train_agent.eval_env.envs[0].spec # Create callback to stop learning early if reward threshold is exceeded if spec.reward_threshold is not None: callback_reward = StopOnReward(reward_threshold=spec.reward_threshold) eval_callback = EvalCallback(train_agent.eval_env, callback_on_new_best=callback_reward, eval_freq=5000, n_eval_episodes=100) else: eval_callback = None try: # Run the learning process train_agent.learn(total_timesteps=max_timesteps, log_interval=5, reset_num_timesteps=False, callback=eval_callback) if train_agent.num_timesteps < max_timesteps: print("Problem solved successfully!") except KeyboardInterrupt: if verbose: print("Interrupting training...") fd, checkpoint_path = tempfile.mkstemp(dir=train_agent.tensorboard_log, prefix=spec.id, suffix='.zip') os.close(fd) train_agent.save(checkpoint_path) return checkpoint_path
def save_trained_model(self, model: BaseAlgorithm) -> None: """ Save trained model optionally with its replay buffer and ``VecNormalize`` statistics :param model: """ print(f"Saving to {self.save_path}") model.save(f"{self.save_path}/{self.env_id}") if hasattr(model, "save_replay_buffer") and self.save_replay_buffer: print("Saving replay buffer") model.save_replay_buffer(os.path.join(self.save_path, "replay_buffer.pkl")) if self.normalize: # Important: save the running average, for testing the agent we need that normalization model.get_vec_normalize_env().save(os.path.join(self.params_path, "vecnormalize.pkl"))
def save_stable_model( output_dir: str, model: BaseAlgorithm, vec_normalize: Optional[VecNormalize] = None, ) -> None: """Serialize policy. Load later with `load_policy(..., policy_path=output_dir)`. Args: output_dir: Path to the save directory. policy: The stable baselines policy. vec_normalize: Optionally, a VecNormalize to save statistics for. `load_policy` automatically applies `NormalizePolicy` wrapper when loading. """ os.makedirs(output_dir, exist_ok=True) model.save(os.path.join(output_dir, "model.pkl")) if vec_normalize is not None: with open(os.path.join(output_dir, "vec_normalize.pkl"), "wb") as f: pickle.dump(vec_normalize, f) logging.info("Saved policy to %s", output_dir)