Beispiel #1
0
    def __init__(
        self,
        env,
        scale_reward=1.0,
        normalize_obs=False,
        normalize_reward=False,
        flatten_obs=True,
        obs_alpha=0.001,
        reward_alpha=0.001,
    ):
        self._Serializable__initialize(locals())
        super(NormalizedEnv, self).__init__(env)
        self._scale_reward = scale_reward
        self._normalize_obs = normalize_obs
        self._normalize_reward = normalize_reward
        self._flatten_obs = flatten_obs

        self._obs_alpha = obs_alpha
        flat_obs_dim = flat_dim(env.observation_space)
        self._obs_mean = np.zeros(flat_obs_dim)
        self._obs_var = np.ones(flat_obs_dim)

        self._reward_alpha = reward_alpha
        self._reward_mean = 0.0
        self._reward_var = 1.0
Beispiel #2
0
 def __init__(self,
              action_space,
              mu=0,
              sigma=0.5,
              theta=0.3,
              dt=1e-2,
              x0=None):
     self.action_space = action_space
     self.action_dim = flat_dim(self.action_space)
     self.mu = mu
     self.sigma = sigma
     self.theta = theta
     self.dt = dt
     self.x0 = x0
     self.reset()
Beispiel #3
0
 def agent_flat_dim(self, i):
     assert i in range(self.agent_num)
     return utils.flat_dim(self.spaces[i])
Beispiel #4
0
 def opponent_flat_dim(self, i):
     assert i in range(self.agent_num)
     return self.flat_dim - utils.flat_dim(self.spaces[i])
Beispiel #5
0
 def flat_dim(self):
     """
     The dimension of the flattened vector of the tensor representation
     """
     return np.sum([utils.flat_dim(x) for x in self.spaces])