コード例 #1
0
def train(train_agent: BaseAlgorithm,
          max_timesteps: int,
          verbose: bool = True) -> str:
    """Train a model on a specific environment using a given agent.

    Note that the agent is associated with a given reinforcement learning
    algorithm, and instanciated for a specific environment and neural network
    model. Thus, it already wraps all the required information to actually
    perform training.

    .. note::
        This function can be terminated early using CTRL+C.

    :param train_agent: Training agent.
    :param max_timesteps: Number of maximum training timesteps.
    :param verbose: Whether or not to print information about what is going on.
                    Optional: True by default.

    :returns: Fullpath of agent's final state dump. Note that it also contains
              the trained neural network model.
    """
    # Get testing environment spec
    spec = train_agent.eval_env.envs[0].spec

    # Create callback to stop learning early if reward threshold is exceeded
    if spec.reward_threshold is not None:
        callback_reward = StopOnReward(reward_threshold=spec.reward_threshold)
        eval_callback = EvalCallback(train_agent.eval_env,
                                     callback_on_new_best=callback_reward,
                                     eval_freq=5000,
                                     n_eval_episodes=100)
    else:
        eval_callback = None

    try:
        # Run the learning process
        train_agent.learn(total_timesteps=max_timesteps,
                          log_interval=5,
                          reset_num_timesteps=False,
                          callback=eval_callback)
        if train_agent.num_timesteps < max_timesteps:
            print("Problem solved successfully!")
    except KeyboardInterrupt:
        if verbose:
            print("Interrupting training...")

    fd, checkpoint_path = tempfile.mkstemp(dir=train_agent.tensorboard_log,
                                           prefix=spec.id,
                                           suffix='.zip')
    os.close(fd)
    train_agent.save(checkpoint_path)

    return checkpoint_path
コード例 #2
0
    def save_trained_model(self, model: BaseAlgorithm) -> None:
        """
        Save trained model optionally with its replay buffer
        and ``VecNormalize`` statistics

        :param model:
        """
        print(f"Saving to {self.save_path}")
        model.save(f"{self.save_path}/{self.env_id}")

        if hasattr(model, "save_replay_buffer") and self.save_replay_buffer:
            print("Saving replay buffer")
            model.save_replay_buffer(os.path.join(self.save_path, "replay_buffer.pkl"))

        if self.normalize:
            # Important: save the running average, for testing the agent we need that normalization
            model.get_vec_normalize_env().save(os.path.join(self.params_path, "vecnormalize.pkl"))
コード例 #3
0
ファイル: serialize.py プロジェクト: whoiszyc/imitation-1
def save_stable_model(
    output_dir: str,
    model: BaseAlgorithm,
    vec_normalize: Optional[VecNormalize] = None,
) -> None:
    """Serialize policy.

    Load later with `load_policy(..., policy_path=output_dir)`.

    Args:
        output_dir: Path to the save directory.
        policy: The stable baselines policy.
        vec_normalize: Optionally, a VecNormalize to save statistics for.
            `load_policy` automatically applies `NormalizePolicy` wrapper
            when loading.
    """
    os.makedirs(output_dir, exist_ok=True)
    model.save(os.path.join(output_dir, "model.pkl"))
    if vec_normalize is not None:
        with open(os.path.join(output_dir, "vec_normalize.pkl"), "wb") as f:
            pickle.dump(vec_normalize, f)
    logging.info("Saved policy to %s", output_dir)