コード例 #1
0
    def step(self, action):
        action = action[0]

        # Force FIRE action to start episodes in games with lives
        if self._force_fire:
            obs, _, _, _ = self.env.env.step(1)
            self._force_fire = False
        while self._current_no_op > 0:
            obs, _, _, _ = self.env.env.step(0)
            self._current_no_op -= 1

        obs, reward, absorbing, info = self.env.step(action)
        self._real_reset = absorbing

        if info['lives'] != self._lives:
            if self._episode_ends_at_life:
                absorbing = True
            self._lives = info['lives']
            self._force_fire = self.env.unwrapped.get_action_meanings(
            )[1] == 'FIRE'

        self._state.append(preprocess_frame(obs, self._img_size))

        return LazyFrames(list(self._state),
                          self._history_length), reward, absorbing, info
コード例 #2
0
 def reset(self, state=None):
     self._state = preprocess_frame(self.env.reset(), self._img_size)
     self._state = deque([deepcopy(
         self._state) for _ in range(self._history_length)],
         maxlen=self._history_length
     )
     return LazyFrames(list(self._state), self._history_length)
コード例 #3
0
    def step(self, action):
        obs, reward, absorbing, info = self.env.step(action)
        reward *= 1. # Int to float
        if reward > 0:
            reward = 1. # MiniGrid discounts rewards based on timesteps, but we need raw rewards

        self._state.append(preprocess_frame(obs, self._img_size))

        return LazyFrames(list(self._state),
                          self._history_length), reward, absorbing, info
コード例 #4
0
    def reset(self, state=None):
        if self._real_reset:
            self._state = preprocess_frame(self.env.reset(), self._img_size)
            self._state = deque(
                [deepcopy(self._state) for _ in range(self._history_length)],
                maxlen=self._history_length)
            self._lives = self._max_lives

        self._force_fire = self.env.unwrapped.get_action_meanings(
        )[1] == 'FIRE'

        self._current_no_op = np.random.randint(self._max_no_op_actions + 1)

        return LazyFrames(list(self._state), self._history_length)