def _update_obs_estimate(self, obs): flat_obs = flatten(self.env.observation_space, obs) self._obs_mean = ( 1 - self._obs_alpha) * self._obs_mean + self._obs_alpha * flat_obs self._obs_var = ( 1 - self._obs_alpha) * self._obs_var + self._obs_alpha * np.square( flat_obs - self._obs_mean)
def _apply_normalize_obs(self, obs): self._update_obs_estimate(obs) normalized_obs = (flatten(self.env.observation_space, obs) - self._obs_mean) / (np.sqrt(self._obs_var) + 1e-8) if not self._flatten_obs: normalized_obs = unflatten(self.env.observation_space, normalized_obs) return normalized_obs
def flatten(self, x): assert len(x) == self.agent_num return np.array( [utils.flatten(space, x_i) for x_i, space in zip(x, self.spaces)])