Beispiel #1
0
    def store_effect(self, idx, step_data):
        """Store effects of action taken after obeserving obs stored at idx.

        The reason `store_observation` and `store_effect` is broken
        up into two functions is so that one can call
        `encode_recent_observation` in between.
        Paramters
        ---------
        idx: int
            Index in buffer of recent observation
            (returned by `store_observation`).
        data: dict
            The data to store in the buffer.
        """
        if self.data == {}:
            self._init_replay_data(step_data)
        if not nest.has_same_structure(self.data, step_data):
            raise ValueError("The data passed to ReplayBuffer must the same"
                             " at all time steps.")

        def _insert(item):
            buffer, x = item
            buffer[idx] = x

        nest.map_structure(_insert, nest.zip_structure(self.data, step_data))
Beispiel #2
0
    def __init__(self,
                 venv,
                 norm=True,
                 steps=10000,
                 mean=None,
                 std=None,
                 eps=1e-2,
                 log=True,
                 log_prob=0.01):
        """Init."""
        super().__init__(venv)
        self.steps = steps
        self.should_norm = norm
        self.eps = eps
        self.log = log
        self.log_prob = log_prob
        self.t = 0
        self._eval = False
        self.mean = None
        self.std = None
        self._dones = np.zeros(self.num_envs, dtype=np.bool)

        if mean is not None and std is not None:
            if not nest.has_same_structure(mean, std):
                raise ValueError("mean and std must have the same structure.")
            self.mean = mean
            self.std = nest.map_structure(lambda x: np.maximum(x, self.eps),
                                          std)
Beispiel #3
0
    def _normalize(self, obs):
        if not self.should_norm:
            return obs
        if self.mean is None or self.std is None:
            self.find_norm_params()
        obs = nest.map_structure(np.asarray, obs)
        obs = nest.map_structure(np.float32, obs)
        if not nest.has_same_structure(self.mean, obs):
            raise ValueError("mean and obs do not have the same structure!")

        def norm(item):
            ob, mean, std = item
            return (ob - mean) / std

        return nest.map_structure(norm,
                                  nest.zip_structure(obs, self.mean, self.std))