Beispiel #1
0
 def add_sample(self, observation, action, reward, terminal, initial):
     self.check_replacement()
     self._observations[self._top] = observation
     if self._is_action_discrete and not isinstance(action,
                                                    (int, np.int64)):
         action = special.from_onehot(action)
     self._actions[self._top] = action
     self._rewards[self._top] = reward
     self._terminals[self._top] = terminal
     self._initials[self._top] = initial
     self.advance()
Beispiel #2
0
    def add_sample(self, observation, action, reward, terminal, initial):
        """
        Add a sample to current replay buffer.

        Parameters
        ----------
        observation (np.array): 
        # TODO (ewei)
        
        """
        self.check_replacement()
        self._observations[self._top] = observation
        if self._is_action_discrete and not isinstance(action,
                                                       (int, np.int64)):
            action = special.from_onehot(action)
        self._actions[self._top] = action
        self._rewards[self._top] = reward
        self._terminals[self._top] = terminal
        self._initials[self._top] = initial
        self.advance()
Beispiel #3
0
 def unflatten(self, x):
     return special.from_onehot(x)