Example #1
0
 def _update_obs_estimate(self, obs):
     flat_obs = flatten(self.env.observation_space, obs)
     self._obs_mean = (
         1 - self._obs_alpha) * self._obs_mean + self._obs_alpha * flat_obs
     self._obs_var = (
         1 - self._obs_alpha) * self._obs_var + self._obs_alpha * np.square(
             flat_obs - self._obs_mean)
Example #2
0
 def _apply_normalize_obs(self, obs):
     self._update_obs_estimate(obs)
     normalized_obs = (flatten(self.env.observation_space, obs) -
                       self._obs_mean) / (np.sqrt(self._obs_var) + 1e-8)
     if not self._flatten_obs:
         normalized_obs = unflatten(self.env.observation_space,
                                    normalized_obs)
     return normalized_obs
Example #3
0
 def flatten(self, x):
     assert len(x) == self.agent_num
     return np.array(
         [utils.flatten(space, x_i) for x_i, space in zip(x, self.spaces)])