Example #1
0
    def _advance(self):
        completed_episode_id = self._current_episode_id
        del self._reward_state[completed_episode_id]

        if None in self._reward_state:
            extra_logger.warn(
                '[%s] WARNING: RewardBuffer: while advancing from %s, None was in reward state: %s',
                self.label, completed_episode_id, self._reward_state)

        max_id = self._max_id()
        if max_id is not None:
            self._current_episode_id = max_id
            if env_status.compare_ids(completed_episode_id,
                                      self._current_episode_id) >= 0:
                extra_logger.info(
                    "[%s] RewardBuffer advancing: setting episode_id=None until new data received. Rare condition reached where message for old environment received after new one: completed_episode_id=%r self._current_episode_id=%r (%r). This is ok, but something we may want to fix in the future",
                    self.label, completed_episode_id, self._current_episode_id,
                    self._reward_state)
                self._current_episode_id = None
            else:
                extra_logger.info(
                    '[%s] RewardBuffer advancing: has data for next episode: %s->%s',
                    self.label, completed_episode_id, self._current_episode_id)
                self._drop_below(self._current_episode_id)
        else:
            extra_logger.info(
                '[%s] RewardBuffer advancing: setting episode_id=None until new data received (was episode_id=%s)',
                self.label, completed_episode_id)
            self._current_episode_id = None
Example #2
0
    def _drop_below(self, episode_id, quiet=False):
        dropped = set()
        for stored_id in self._reward_state:
            if env_status.compare_ids(stored_id, episode_id) < 0:
                dropped.add(stored_id)

        if len(dropped) > 0:
            if quiet:
                log = extra_logger.debug
            else:
                log = extra_logger.info
            log('[%s] RewardBuffer: dropping stale episode data: dropped=%s episode_id=%s', self.label, dropped, episode_id)
        for stored_id in dropped:
            del self._reward_state[stored_id]