Exemple #1
0
    def _flip_past(self, observation_n, reward_n, done_n, info):
        # Wait until all observations are past the corresponding reset times
        remote_target_time = [
            info_i['reward_buffer.remote_time'] for info_i in info['n']
        ]
        while True:
            new_observation_n, new_reward_n, new_done_n, new_info = self.env.step(
                [[] for i in range(self.n)])

            # info_i.get['diagnostics.image_remote_time'] may not exist, for example when an env
            # is resetting. target is a timestamp, thus > 0, so these will count as "need to catch up"
            deltas = [
                target - info_i.get('diagnostics.image_remote_time', 0)
                for target, info_i in zip(remote_target_time, new_info['n'])
            ]
            count = len([d for d in deltas if d > 0])

            rewarder.merge_n(observation_n, reward_n, done_n, info,
                             new_observation_n, new_reward_n, new_done_n,
                             new_info)

            if count == 0:
                return
            else:
                logger.debug(
                    '[GymCoreSync] Still waiting on %d envs to catch up to their targets: %s',
                    count, deltas)
Exemple #2
0
    def _step(self, action_n):
        # Add C keypress in order to "commit" the action, as
        # interpreted by the remote.
        action_n = [action + [
            spaces.KeyEvent.by_name('c', down=True),
            spaces.KeyEvent.by_name('c', down=False)
        ] for action in action_n]

        observation_n, reward_n, done_n, info = self.env.step(action_n)
        if self.reward_n is not None:
            rewarder.merge_n(
                observation_n, reward_n, done_n, info,
                [None] * self.n, self.reward_n, self.done_n, self.info,
            )
            self.reward_n = self.done_n = self.info = None

        while True:
            count = len([True for info_i in info['n'] if info_i['stats.reward.count'] == 0])
            if count > 0:
                logger.debug('[GymCoreSync] Still waiting on %d envs to receive their post-commit reward', count)
            else:
                break

            new_observation_n, new_reward_n, new_done_n, new_info = self.env.step([[] for i in range(self.n)])
            rewarder.merge_n(
                observation_n, reward_n, done_n, info,
                new_observation_n, new_reward_n, new_done_n, new_info
            )

        assert all(info_i['stats.reward.count'] == 1 for info_i in info['n']), "Expected all stats.reward.counts to be 1: {}".format(info)

        # Fast forward until the observation is caught up with the rewarder
        self._flip_past(observation_n, reward_n, done_n, info)
        return observation_n, reward_n, done_n, info
Exemple #3
0
 def _step(self, action_n):
     observation_n, reward_n, done_n, info = self.env.step(action_n)
     if self.reward_n is not None:
         rewarder.merge_n(
             observation_n, reward_n, done_n, info,
             [None] * self.n, self.reward_n, self.done_n, self.info,
         )
         self.reward_n = self.done_n = self.info = None
     return self._observation(done_n, info), reward_n, done_n, info
Exemple #4
0
 def _reset(self):
     observation_n = self.env.reset()
     self.reward_n = [0] * self.n
     self.done_n = [False] * self.n
     self.info = {'n': [{} for _ in range(self.n)]}
     new_observation_n, new_reward_n, new_done_n, new_info = self.env.step(
         [[] for i in range(self.n)])
     rewarder.merge_n(observation_n, self.reward_n, self.done_n, self.info,
                      new_observation_n, new_reward_n, new_done_n, new_info)
     return self._observation(self.done_n, self.info)
Exemple #5
0
 def _reset(self):
     observation_n = self.env.reset()
     self.reward_n = [0] * self.n
     self.done_n = [False] * self.n
     self.info = {'n': [{} for _ in range(self.n)]}
     new_observation_n, new_reward_n, new_done_n, new_info = self.env.step([[] for i in range(self.n)])
     rewarder.merge_n(
         observation_n, self.reward_n, self.done_n, self.info,
         new_observation_n, new_reward_n, new_done_n, new_info
     )
     return self._observation(self.done_n, self.info)
Exemple #6
0
    def _step(self, action_n):
        # Add C keypress in order to "commit" the action, as
        # interpreted by the remote.
        action_n = [
            action + [
                spaces.KeyEvent.by_name('c', down=True),
                spaces.KeyEvent.by_name('c', down=False)
            ] for action in action_n
        ]

        observation_n, reward_n, done_n, info = self.env.step(action_n)
        if self.reward_n is not None:
            rewarder.merge_n(
                observation_n,
                reward_n,
                done_n,
                info,
                [None] * self.n,
                self.reward_n,
                self.done_n,
                self.info,
            )
            self.reward_n = self.done_n = self.info = None

        while True:
            count = len([
                True for info_i in info['n']
                if info_i['stats.reward.count'] == 0
            ])
            if count > 0:
                logger.debug(
                    '[GymCoreSync] Still waiting on %d envs to receive their post-commit reward',
                    count)
            else:
                break

            new_observation_n, new_reward_n, new_done_n, new_info = self.env.step(
                [[] for i in range(self.n)])
            rewarder.merge_n(observation_n, reward_n, done_n, info,
                             new_observation_n, new_reward_n, new_done_n,
                             new_info)

        assert all(
            info_i['stats.reward.count'] == 1 for info_i in info['n']
        ), "Expected all stats.reward.counts to be 1: {}".format(info)

        # Fast forward until the observation is caught up with the rewarder
        self._flip_past(observation_n, reward_n, done_n, info)
        return observation_n, reward_n, done_n, info
Exemple #7
0
 def _step(self, action_n):
     observation_n, reward_n, done_n, info = self.env.step(action_n)
     if self.reward_n is not None:
         rewarder.merge_n(
             observation_n,
             reward_n,
             done_n,
             info,
             [None] * self.n,
             self.reward_n,
             self.done_n,
             self.info,
         )
         self.reward_n = self.done_n = self.info = None
     return self._observation(done_n, info), reward_n, done_n, info
Exemple #8
0
    def _flip_past(self, observation_n, reward_n, done_n, info):
        # Wait until all observations are past the corresponding reset times
        remote_target_time = [info_i['reward_buffer.remote_time'] for info_i in info['n']]
        while True:
            new_observation_n, new_reward_n, new_done_n, new_info = self.env.step([[] for i in range(self.n)])

            # info_i.get['diagnostics.image_remote_time'] may not exist, for example when an env
            # is resetting. target is a timestamp, thus > 0, so these will count as "need to catch up"
            deltas = [target - info_i.get('diagnostics.image_remote_time', 0) for target, info_i in zip(remote_target_time, new_info['n'])]
            count = len([d for d in deltas if d > 0])

            rewarder.merge_n(
                observation_n, reward_n, done_n, info,
                new_observation_n, new_reward_n, new_done_n, new_info
            )

            if count == 0:
                return
            else:
                logger.debug('[GymCoreSync] Still waiting on %d envs to catch up to their targets: %s', count, deltas)
Exemple #9
0
    def _reset(self):
        observation_n = self.env.reset()
        self.reward_n = [0] * self.n
        self.done_n = [False] * self.n
        self.info = {'n': [{} for _ in range(self.n)]}

        while any(ob is None for ob in observation_n):
            action_n = []
            for done in self.done_n:
                if done:
                    # No popping of reward/done. Don't want to merge across episode boundaries.
                    action_n.append([spaces.PeekReward])
                else:
                    action_n.append([])
            new_observation_n, new_reward_n, new_done_n, new_info = self.env.step(
                action_n)
            rewarder.merge_n(observation_n, self.reward_n, self.done_n,
                             self.info, new_observation_n, new_reward_n,
                             new_done_n, new_info)
        return observation_n
Exemple #10
0
    def _step(self, action_n):
        observation_n, reward_n, done_n, info = self.env.step(action_n)
        if self.reward_n is not None:
            rewarder.merge_n(observation_n, reward_n, done_n, info,
                             [None] * self.n, self.reward_n, self.done_n,
                             self.info)
            self.reward_n = self.done_n = self.info = None

        while any(ob is None for ob in observation_n):
            action_n = []
            for done in done_n:
                if done:
                    # No popping of reward/done. Don't want to merge across episode boundaries.
                    action_n.append([spaces.PeekReward])
                else:
                    action_n.append([])
            new_observation_n, new_reward_n, new_done_n, new_info = self.env.step(
                action_n)
            rewarder.merge_n(observation_n, reward_n, done_n, info,
                             new_observation_n, new_reward_n, new_done_n,
                             new_info)
        return observation_n, reward_n, done_n, info
Exemple #11
0
    def _step(self, action_n):
        if self._steps is None:
            self._start_timer()
        self._steps += 1

        accum_observation_n, accum_reward_n, accum_done_n, accum_info = self._substep(
            action_n)
        accum_info['throttle.action.available_at'] = time.time()

        # Record which indexes we were just peeking at, so when we
        # make the follow-up we'll be sure to peek there too.
        peek_n = [
            any(spaces.PeekReward for peek in action) for action in action_n
        ]

        if self.fps is None:
            return accum_observation_n, accum_reward_n, accum_done_n, accum_info

        accum_info['stats.throttle.sleep'] = 0
        while True:
            # See how much time we have to idle
            delta = self._start + 1. / self.fps * self._steps - time.time()

            # The following assumes that our control loop
            if delta < 0:
                # We're out of time. Just get out of here.
                delta = abs(delta)
                if delta >= 1:
                    logger.info(
                        'Throttle fell behind by %.2fs; lost %.2f frames',
                        delta, self.fps * delta)
                pyprofile.timing('vnc_env.Throttle.lost_sleep', delta)
                self._start_timer()
                break
            # elif delta < 0.008:
            #     # Only have 8ms. Let's spend it sleeping, and
            #     # return an image which may have up to an
            #     # additional 8ms lag.
            #     #
            #     # 8ms is reasonably arbitrary; we just want something
            #     # that's small where it's not actually going to help
            #     # if we make another step call. Step with 32 parallel
            #     # envs takes about 6ms (about half of which is
            #     # diagnostics, which could be totally async!), so 8 is
            #     # a reasonable choice for now..
            #     pyprofile.timing('vnc_env.Throttle.sleep', delta)
            #     accum_info['stats.throttle.sleep'] += delta
            #     time.sleep(delta)
            #     break
            else:
                # We've got plenty of time. Sleep for up to 16ms, and
                # then refresh our current frame. We need to
                # constantly be calling step so that our lags are
                # reported correctly, within 16ms. (The layering is
                # such that the vncdriver doesn't know which pixels
                # correspond to metadata, and the diagnostics don't
                # know when pixels first got painted. So we do our
                # best to present frames as they're ready to the
                # diagnostics.)
                delta = min(delta, 0.016)
                pyprofile.timing('vnc_env.Throttle.sleep', delta)
                accum_info['stats.throttle.sleep'] += delta
                time.sleep(delta)

                # We want to merge in the latest reward/done/info so that our
                # agent has the most up-to-date info post-sleep, but also want
                # to avoid popping any rewards where done=True (since we'd
                # have to merge across episode boundaries).
                action_n = []
                for done, peek in zip(accum_done_n, peek_n):
                    if done or peek:
                        # No popping of reward/done
                        action_n.append([spaces.PeekReward])
                    else:
                        action_n.append([])

                observation_n, reward_n, done_n, info = self._substep(action_n)

                # Merge observation, rewards and metadata.
                # Text observation has order in which the messages are sent.
                rewarder.merge_n(
                    accum_observation_n,
                    accum_reward_n,
                    accum_done_n,
                    accum_info,
                    observation_n,
                    reward_n,
                    done_n,
                    info,
                )

        return accum_observation_n, accum_reward_n, accum_done_n, accum_info
Exemple #12
0
    def _step(self, action_n):
        if self._steps is None:
            self._start_timer()
        self._steps += 1

        accum_observation_n, accum_reward_n, accum_done_n, accum_info = self._substep(action_n)
        accum_info['throttle.action.available_at'] = time.time()

        # Record which indexes we were just peeking at, so when we
        # make the follow-up we'll be sure to peek there too.
        peek_n = [any(spaces.PeekReward for peek in action) for action in action_n]

        if self.fps is None:
            return accum_observation_n, accum_reward_n, accum_done_n, accum_info

        accum_info['stats.throttle.sleep'] = 0
        while True:
            # See how much time we have to idle
            delta = self._start + 1./self.fps * self._steps - time.time()

            # The following assumes that our control loop
            if delta < 0:
                # We're out of time. Just get out of here.
                delta = abs(delta)
                if delta >= 1:
                    logger.info('Throttle fell behind by %.2fs; lost %.2f frames', delta, self.fps*delta)
                pyprofile.timing('vnc_env.Throttle.lost_sleep', delta)
                self._start_timer()
                break
            # elif delta < 0.008:
            #     # Only have 8ms. Let's spend it sleeping, and
            #     # return an image which may have up to an
            #     # additional 8ms lag.
            #     #
            #     # 8ms is reasonably arbitrary; we just want something
            #     # that's small where it's not actually going to help
            #     # if we make another step call. Step with 32 parallel
            #     # envs takes about 6ms (about half of which is
            #     # diagnostics, which could be totally async!), so 8 is
            #     # a reasonable choice for now..
            #     pyprofile.timing('vnc_env.Throttle.sleep', delta)
            #     accum_info['stats.throttle.sleep'] += delta
            #     time.sleep(delta)
            #     break
            else:
                # We've got plenty of time. Sleep for up to 16ms, and
                # then refresh our current frame. We need to
                # constantly be calling step so that our lags are
                # reported correctly, within 16ms. (The layering is
                # such that the vncdriver doesn't know which pixels
                # correspond to metadata, and the diagnostics don't
                # know when pixels first got painted. So we do our
                # best to present frames as they're ready to the
                # diagnostics.)
                delta = min(delta, 0.016)
                pyprofile.timing('vnc_env.Throttle.sleep', delta)
                accum_info['stats.throttle.sleep'] += delta
                time.sleep(delta)

                # We want to merge in the latest reward/done/info so that our
                # agent has the most up-to-date info post-sleep, but also want
                # to avoid popping any rewards where done=True (since we'd
                # have to merge across episode boundaries).
                action_n = []
                for done, peek in zip(accum_done_n, peek_n):
                    if done or peek:
                        # No popping of reward/done
                        action_n.append([spaces.PeekReward])
                    else:
                        action_n.append([])

                observation_n, reward_n, done_n, info = self._substep(action_n)

                # Merge observation, rewards and metadata.
                # Text observation has order in which the messages are sent.
                rewarder.merge_n(
                    accum_observation_n, accum_reward_n, accum_done_n, accum_info,
                    observation_n, reward_n, done_n, info,
                )

        return accum_observation_n, accum_reward_n, accum_done_n, accum_info