def _advance(self): completed_episode_id = self._current_episode_id del self._reward_state[completed_episode_id] if None in self._reward_state: extra_logger.warn( '[%s] WARNING: RewardBuffer: while advancing from %s, None was in reward state: %s', self.label, completed_episode_id, self._reward_state) max_id = self._max_id() if max_id is not None: self._current_episode_id = max_id if env_status.compare_ids(completed_episode_id, self._current_episode_id) >= 0: extra_logger.info( "[%s] RewardBuffer advancing: setting episode_id=None until new data received. Rare condition reached where message for old environment received after new one: completed_episode_id=%r self._current_episode_id=%r (%r). This is ok, but something we may want to fix in the future", self.label, completed_episode_id, self._current_episode_id, self._reward_state) self._current_episode_id = None else: extra_logger.info( '[%s] RewardBuffer advancing: has data for next episode: %s->%s', self.label, completed_episode_id, self._current_episode_id) self._drop_below(self._current_episode_id) else: extra_logger.info( '[%s] RewardBuffer advancing: setting episode_id=None until new data received (was episode_id=%s)', self.label, completed_episode_id) self._current_episode_id = None
def _drop_below(self, episode_id, quiet=False): dropped = set() for stored_id in self._reward_state: if env_status.compare_ids(stored_id, episode_id) < 0: dropped.add(stored_id) if len(dropped) > 0: if quiet: log = extra_logger.debug else: log = extra_logger.info log('[%s] RewardBuffer: dropping stale episode data: dropped=%s episode_id=%s', self.label, dropped, episode_id) for stored_id in dropped: del self._reward_state[stored_id]