def __init__(self, policy, env, verbose=0, *, requires_vec_env, policy_base): if isinstance(policy, str): self.policy = get_policy_from_name(policy_base, policy) else: self.policy = policy self.env = env self.verbose = verbose self._requires_vec_env = requires_vec_env self.observation_space = None self.action_space = None self.n_envs = None self._vectorize_action = False if env is not None: if isinstance(env, str): if self.verbose >= 1: print("Creating environment from the given name, wrapped in a DummyVecEnv.") self.env = env = DummyVecEnv([lambda: gym.make(env)]) self.observation_space = env.observation_space self.action_space = env.action_space if requires_vec_env: if isinstance(env, VecEnv): self.n_envs = env.num_envs else: raise ValueError("Error: the model requires a vectorized environment, please use a VecEnv wrapper.") else: if isinstance(env, VecEnv): if env.num_envs == 1: self.env = _UnvecWrapper(env) self._vectorize_action = True else: raise ValueError("Error: the model requires a non vectorized environment or a single vectorized" " environment.") self.n_envs = 1
def __init__(self, policy, env, policy_base, policy_kwargs=None, verbose=0, device='auto', support_multi_env=False, create_eval_env=False, monitor_wrapper=True, seed=None): if isinstance(policy, str) and policy_base is not None: self.policy_class = get_policy_from_name(policy_base, policy) else: self.policy_class = policy self.env = env # get VecNormalize object if needed self._vec_normalize_env = unwrap_vec_normalize(env) self.verbose = verbose self.policy_kwargs = {} if policy_kwargs is None else policy_kwargs self.observation_space = None self.action_space = None self.n_envs = None self.num_timesteps = 0 self.eval_env = None self.replay_buffer = None self.seed = seed self.action_noise = None # Track the training progress (from 1 to 0) # this is used to update the learning rate self._current_progress = 1 # Create and wrap the env if needed if env is not None: if isinstance(env, str): if create_eval_env: eval_env = gym.make(env) if monitor_wrapper: eval_env = Monitor(eval_env, filename=None) self.eval_env = DummyVecEnv([lambda: eval_env]) if self.verbose >= 1: print( "Creating environment from the given name, wrapped in a DummyVecEnv." ) env = gym.make(env) if monitor_wrapper: env = Monitor(env, filename=None) env = DummyVecEnv([lambda: env]) self.observation_space = env.observation_space self.action_space = env.action_space if not isinstance(env, VecEnv): if self.verbose >= 1: print("Wrapping the env in a DummyVecEnv.") env = DummyVecEnv([lambda: env]) self.n_envs = env.num_envs self.env = env if not support_multi_env and self.n_envs > 1: raise ValueError( "Error: the model does not support multiple envs requires a single vectorized" " environment.")