def __init__( self, env, scale_reward=1.0, normalize_obs=False, normalize_reward=False, flatten_obs=True, obs_alpha=0.001, reward_alpha=0.001, ): self._Serializable__initialize(locals()) super(NormalizedEnv, self).__init__(env) self._scale_reward = scale_reward self._normalize_obs = normalize_obs self._normalize_reward = normalize_reward self._flatten_obs = flatten_obs self._obs_alpha = obs_alpha flat_obs_dim = flat_dim(env.observation_space) self._obs_mean = np.zeros(flat_obs_dim) self._obs_var = np.ones(flat_obs_dim) self._reward_alpha = reward_alpha self._reward_mean = 0.0 self._reward_var = 1.0
def __init__(self, action_space, mu=0, sigma=0.5, theta=0.3, dt=1e-2, x0=None): self.action_space = action_space self.action_dim = flat_dim(self.action_space) self.mu = mu self.sigma = sigma self.theta = theta self.dt = dt self.x0 = x0 self.reset()
def agent_flat_dim(self, i): assert i in range(self.agent_num) return utils.flat_dim(self.spaces[i])
def opponent_flat_dim(self, i): assert i in range(self.agent_num) return self.flat_dim - utils.flat_dim(self.spaces[i])
def flat_dim(self): """ The dimension of the flattened vector of the tensor representation """ return np.sum([utils.flat_dim(x) for x in self.spaces])