コード例 #1
0
    def _flip_past(self, observation_n, reward_n, done_n, info):
        # Wait until all observations are past the corresponding reset times
        remote_target_time = [
            info_i['reward_buffer.remote_time'] for info_i in info['n']
        ]
        while True:
            new_observation_n, new_reward_n, new_done_n, new_info = self.env.step(
                [[] for i in range(self.n)])

            # info_i.get['diagnostics.image_remote_time'] may not exist, for example when an env
            # is resetting. target is a timestamp, thus > 0, so these will count as "need to catch up"
            deltas = [
                target - info_i.get('diagnostics.image_remote_time', 0)
                for target, info_i in zip(remote_target_time, new_info['n'])
            ]
            count = len([d for d in deltas if d > 0])

            rewarder.merge_n(observation_n, reward_n, done_n, info,
                             new_observation_n, new_reward_n, new_done_n,
                             new_info)

            if count == 0:
                return
            else:
                logger.debug(
                    '[LabCoreSync] Still waiting on %d envs to catch up to their targets: %s',
                    count, deltas)
コード例 #2
0
 def _step(self, action_n):
     observation_n, reward_n, done_n, info = self.env.step(action_n)
     if self.reward_n is not None:
         rewarder.merge_n(
             observation_n, reward_n, done_n, info,
             [None] * self.n, self.reward_n, self.done_n, self.info,
         )
         self.reward_n = self.done_n = self.info = None
     return self._observation(done_n, info), reward_n, done_n, info
コード例 #3
0
 def _reset(self):
     observation_n = self.env.reset()
     self.reward_n = [0] * self.n
     self.done_n = [False] * self.n
     self.info = {'n': [{} for _ in range(self.n)]}
     new_observation_n, new_reward_n, new_done_n, new_info = self.env.step([[] for i in range(self.n)])
     rewarder.merge_n(
         observation_n, self.reward_n, self.done_n, self.info,
         new_observation_n, new_reward_n, new_done_n, new_info
     )
     return self._observation(self.done_n, self.info)
コード例 #4
0
    def _step(self, action_n):
        # Add C keypress in order to "commit" the action, as
        # interpreted by the remote.
        action_n = [
            action + [
                spaces.KeyEvent.by_name('c', down=True),
                spaces.KeyEvent.by_name('c', down=False)
            ] for action in action_n
        ]

        observation_n, reward_n, done_n, info = self.env.step(action_n)
        if self.reward_n is not None:
            rewarder.merge_n(
                observation_n,
                reward_n,
                done_n,
                info,
                [None] * self.n,
                self.reward_n,
                self.done_n,
                self.info,
            )
            self.reward_n = self.done_n = self.info = None

        while True:
            count = len([
                True for info_i in info['n']
                if info_i['stats.reward.count'] == 0
            ])
            if count > 0:
                logger.debug(
                    '[LabCoreSync] Still waiting on %d envs to receive their post-commit reward',
                    count)
            else:
                break

            new_observation_n, new_reward_n, new_done_n, new_info = self.env.step(
                [[] for i in range(self.n)])
            rewarder.merge_n(observation_n, reward_n, done_n, info,
                             new_observation_n, new_reward_n, new_done_n,
                             new_info)

        assert all(
            info_i['stats.reward.count'] == 1 for info_i in info['n']
        ), "Expected all stats.reward.counts to be 1: {}".format(info)

        # Fast forward until the observation is caught up with the rewarder
        self._flip_past(observation_n, reward_n, done_n, info)
        return observation_n, reward_n, done_n, info
コード例 #5
0
ファイル: blocking_reset.py プロジェクト: SynthAI/SynthAI
    def _reset(self):
        observation_n = self.env.reset()
        self.reward_n = [0] * self.n
        self.done_n = [False] * self.n
        self.info = {'n': [{} for _ in range(self.n)]}

        while any(ob is None for ob in observation_n):
            action_n = []
            for done in self.done_n:
                if done:
                    # No popping of reward/done. Don't want to merge across episode boundaries.
                    action_n.append([spaces.PeekReward])
                else:
                    action_n.append([])
            new_observation_n, new_reward_n, new_done_n, new_info = self.env.step(
                action_n)
            rewarder.merge_n(observation_n, self.reward_n, self.done_n,
                             self.info, new_observation_n, new_reward_n,
                             new_done_n, new_info)
        return observation_n
コード例 #6
0
ファイル: blocking_reset.py プロジェクト: SynthAI/SynthAI
    def _step(self, action_n):
        observation_n, reward_n, done_n, info = self.env.step(action_n)
        if self.reward_n is not None:
            rewarder.merge_n(observation_n, reward_n, done_n, info,
                             [None] * self.n, self.reward_n, self.done_n,
                             self.info)
            self.reward_n = self.done_n = self.info = None

        while any(ob is None for ob in observation_n):
            action_n = []
            for done in done_n:
                if done:
                    # No popping of reward/done. Don't want to merge across episode boundaries.
                    action_n.append([spaces.PeekReward])
                else:
                    action_n.append([])
            new_observation_n, new_reward_n, new_done_n, new_info = self.env.step(
                action_n)
            rewarder.merge_n(observation_n, reward_n, done_n, info,
                             new_observation_n, new_reward_n, new_done_n,
                             new_info)
        return observation_n, reward_n, done_n, info