Beispiel #1
0
    def __init__(self, venv, training=True, norm_obs=True, norm_reward=True,
                 clip_obs=10., clip_reward=10., gamma=0.99, epsilon=1e-8, context_bounds=None):
        VecEnvWrapper.__init__(self, venv)

        self.context_bounds = context_bounds
        self.dim_off = None
        if self.context_bounds is not None:
            assert len(self.observation_space.shape) == 1
            self.dim_off = self.observation_space.shape[0] - self.context_bounds[0].shape[0]

        # If the offset dimension is not None, i.e. if we normalize the context observations differently, initialize
        # the RunningMeanStd with a different dimensionality - it is assumed that the context follows the original state
        if self.dim_off is None:
            self.obs_rms = RunningMeanStd(shape=self.observation_space.shape)
        else:
            if len(self.observation_space.shape) != 1:
                raise RuntimeError("Only simple continuous observation spaces support an offset index")
            self.obs_rms = RunningMeanStd(shape=(self.dim_off,))

        self.ret_rms = RunningMeanStd(shape=())
        self.clip_obs = clip_obs
        self.clip_reward = clip_reward
        # Returns: discounted rewards
        self.ret = np.zeros(self.num_envs)
        self.gamma = gamma
        self.epsilon = epsilon
        self.training = training
        self.norm_obs = norm_obs
        self.norm_reward = norm_reward
        self.old_obs = None
        self.old_rews = None
Beispiel #2
0
 def __init__(self, venv, n_stack):
     self.venv = venv
     self.n_stack = n_stack
     wrapped_obs_space = venv.observation_space
     low = np.repeat(wrapped_obs_space.low, self.n_stack, axis=-1)
     high = np.repeat(wrapped_obs_space.high, self.n_stack, axis=-1)
     self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype)
     observation_space = spaces.Box(low=low,
                                    high=high,
                                    dtype=venv.observation_space.dtype)
     VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Beispiel #3
0
 def __init__(self,
              venv,
              raise_exception=False,
              warn_once=True,
              check_inf=True):
     VecEnvWrapper.__init__(self, venv)
     self.raise_exception = raise_exception
     self.warn_once = warn_once
     self.check_inf = check_inf
     self._actions = None
     self._observations = None
     self._user_warned = False
    def set_venv(self, venv):
        """
        Sets the vector environment to wrap to venv.

        Also sets attributes derived from this such as `num_env`.

        :param venv: (VecEnv)
        """
        if self.venv is not None:
            raise ValueError("Trying to set venv of already initialized VecNormalize wrapper.")
        VecEnvWrapper.__init__(self, venv)
        if self.obs_rms.mean.shape != self.observation_space.shape:
            raise ValueError("venv is incompatible with current statistics.")
        self.ret = np.zeros(self.num_envs)
 def __init__(self, venv, training=True, norm_obs=True, norm_reward=True,
              clip_obs=10., clip_reward=10., gamma=0.99, epsilon=1e-8):
     VecEnvWrapper.__init__(self, venv)
     self.obs_rms = RunningMeanStd(shape=self.observation_space.shape)
     self.ret_rms = RunningMeanStd(shape=())
     self.clip_obs = clip_obs
     self.clip_reward = clip_reward
     # Returns: discounted rewards
     self.ret = np.zeros(self.num_envs)
     self.gamma = gamma
     self.epsilon = epsilon
     self.training = training
     self.norm_obs = norm_obs
     self.norm_reward = norm_reward
     self.old_obs = np.array([])
Beispiel #6
0
    def __init__(self,
                 venv,
                 video_folder,
                 record_video_trigger,
                 video_length=200,
                 name_prefix='rl-video'):

        VecEnvWrapper.__init__(self, venv)

        self.env = venv
        # Temp variable to retrieve metadata
        temp_env = venv

        # Unwrap to retrieve metadata dict
        # that will be used by gym recorder
        while isinstance(temp_env, VecNormalize) or isinstance(
                temp_env, VecFrameStack):
            temp_env = temp_env.venv

        if isinstance(temp_env, DummyVecEnv) or isinstance(
                temp_env, SubprocVecEnv):
            metadata = temp_env.get_attr('metadata')[0]
        else:
            metadata = temp_env.metadata

        self.env.metadata = metadata

        self.record_video_trigger = record_video_trigger
        self.video_recorder = None

        self.video_folder = os.path.abspath(video_folder)
        # Create output folder if needed
        os.makedirs(self.video_folder, exist_ok=True)

        self.name_prefix = name_prefix
        self.step_id = 0
        self.video_length = video_length

        self.recording = False
        self.recorded_frames = 0
Beispiel #7
0
 def close(self):
     VecEnvWrapper.close(self)
     self.close_video_recorder()
Beispiel #8
0
 def __init__(self, venv,):
     VecEnvWrapper.__init__(self, venv)