def onMessage(self, payload, isBinary): if not self.factory.agent_conn.check_message(self): return assert not isBinary, "Binary websocket not supported" payload = ujson.loads(payload) context = { 'start': time.time(), 'conn': self, } latency = context['start'] - payload['headers']['sent_at'] pyprofile.incr('rewarder_protocol.messages') pyprofile.incr('rewarder_protocol.messages.{}'.format(payload['method'])) pyprofile.timing('rewarder_protocol.latency.rtt.skew_unadjusted', 2*latency) if latency < 0: pyprofile.incr('rewarder_protocol.latency.rtt.skew_unadjusted.negative') if payload['method'] == 'v0.env.reset': logger.info('Received reset message: %s', payload) self.factory.agent_conn.control_buffer.recv_rpc(context, payload) elif payload['method'] == 'v0.control.ping': logger.debug('Received ping message: %s', payload) parent_message_id = payload['headers']['message_id'] headers = {'parent_message_id': parent_message_id} self.send_message('v0.reply.control.ping', {}, headers) else: logger.warn('Received unsupported message: %s', payload)
def __call__(self, img, available_at): # Choose the return value if len(self.deque) > 0 and self.deque[0].ready(): last = self.deque.popleft() res = last.get() if res is not None: pyprofile.timing('vnc_env.diagnostics.async_decode.latency', time.time() - res['available_at']) else: res = False pyprofile.gauge('vnc_env.diagnostics.async_decode.queue_depth', len(self.deque)) # Just grayscale it by keeping only one component. Should be # good enough as this region is black and white anyway. grayscale = img[self.y:self.y+self.height, self.x:self.x+self.width, 0] # Apply processing if needed match = np.array_equal(self._last_img, grayscale) if not match: pyprofile.incr('vnc_env.diagnostics.async_decode.schedule') # sneakily copy if numpy hasn't, so it can be cached self._last_img = np.ascontiguousarray(grayscale) async = self.qr_pool.apply_async(self.method, (self._last_img, time.time(), available_at)) self.deque.append(async) else: pyprofile.incr('vnc_env.diagnostics.async_decode.cache_hit') return res
def __call__(self, img, available_at): # Choose the return value if len(self.deque) > 0 and self.deque[0].ready(): last = self.deque.popleft() res = last.get() if res is not None: pyprofile.timing('vnc_env.diagnostics.async_decode.latency', time.time() - res['available_at']) else: res = False pyprofile.gauge('vnc_env.diagnostics.async_decode.queue_depth', len(self.deque)) # Just grayscale it by keeping only one component. Should be # good enough as this region is black and white anyway. grayscale = img[self.y:self.y + self.height, self.x:self.x + self.width, 0] # Apply processing if needed match = np.array_equal(self._last_img, grayscale) if not match: pyprofile.incr('vnc_env.diagnostics.async_decode.schedule') # sneakily copy if numpy hasn't, so it can be cached self._last_img = np.ascontiguousarray(grayscale) async = self.qr_pool.apply_async( self.method, (self._last_img, time.time(), available_at)) self.deque.append(async) else: pyprofile.incr('vnc_env.diagnostics.async_decode.cache_hit') return res
def _step(self, action_n): start = time.time() with pyprofile.push('vnc_env.Timer.step'): observation_n, reward_n, done_n, info = self.env.step(action_n) # Calculate how much time was spent actually doing work sleep = info.get('stats.throttle.sleep') if sleep is None or sleep < 0: sleep = 0 pyprofile.timing('vnc_env.Timer.step.excluding_sleep', time.time() - start - sleep) return observation_n, reward_n, done_n, info
def onMessage(self, payload, isBinary): extra_logger.debug('[%s] Received payload: %s', self.factory.label, payload) assert not isBinary payload = ujson.loads(payload) context = self._make_context() latency = context['start'] - payload['headers']['sent_at'] pyprofile.incr('rewarder_protocol.messages') pyprofile.incr('rewarder_protocol.messages.{}'.format( payload['method'])) # Double latency to model RTT pyprofile.timing('rewarder_protocol.latency.rtt.skew_unadjusted', 2 * latency) if latency < 0: pyprofile.incr( 'rewarder_protocol.latency.rtt.skew_unadjusted.negative') self.recv(context, payload)
def _step(self, action_n): if self._steps is None: self._start_timer() self._steps += 1 accum_observation_n, accum_reward_n, accum_done_n, accum_info = self._substep( action_n) accum_info['throttle.action.available_at'] = time.time() # Record which indexes we were just peeking at, so when we # make the follow-up we'll be sure to peek there too. peek_n = [ any(spaces.PeekReward for peek in action) for action in action_n ] if self.fps is None: return accum_observation_n, accum_reward_n, accum_done_n, accum_info accum_info['stats.throttle.sleep'] = 0 while True: # See how much time we have to idle delta = self._start + 1. / self.fps * self._steps - time.time() # The following assumes that our control loop if delta < 0: # We're out of time. Just get out of here. delta = abs(delta) if delta >= 1: logger.info( 'Throttle fell behind by %.2fs; lost %.2f frames', delta, self.fps * delta) pyprofile.timing('vnc_env.Throttle.lost_sleep', delta) self._start_timer() break # elif delta < 0.008: # # Only have 8ms. Let's spend it sleeping, and # # return an image which may have up to an # # additional 8ms lag. # # # # 8ms is reasonably arbitrary; we just want something # # that's small where it's not actually going to help # # if we make another step call. Step with 32 parallel # # envs takes about 6ms (about half of which is # # diagnostics, which could be totally async!), so 8 is # # a reasonable choice for now.. # pyprofile.timing('vnc_env.Throttle.sleep', delta) # accum_info['stats.throttle.sleep'] += delta # time.sleep(delta) # break else: # We've got plenty of time. Sleep for up to 16ms, and # then refresh our current frame. We need to # constantly be calling step so that our lags are # reported correctly, within 16ms. (The layering is # such that the vncdriver doesn't know which pixels # correspond to metadata, and the diagnostics don't # know when pixels first got painted. So we do our # best to present frames as they're ready to the # diagnostics.) delta = min(delta, 0.016) pyprofile.timing('vnc_env.Throttle.sleep', delta) accum_info['stats.throttle.sleep'] += delta time.sleep(delta) # We want to merge in the latest reward/done/info so that our # agent has the most up-to-date info post-sleep, but also want # to avoid popping any rewards where done=True (since we'd # have to merge across episode boundaries). action_n = [] for done, peek in zip(accum_done_n, peek_n): if done or peek: # No popping of reward/done action_n.append([spaces.PeekReward]) else: action_n.append([]) observation_n, reward_n, done_n, info = self._substep(action_n) # Merge observation, rewards and metadata. # Text observation has order in which the messages are sent. rewarder.merge_n( accum_observation_n, accum_reward_n, accum_done_n, accum_info, observation_n, reward_n, done_n, info, ) return accum_observation_n, accum_reward_n, accum_done_n, accum_info
def do_run(self): # For debug environments which set a server-enforced time # limit frames = 0 # Log all the rewards, but buffer those logs rather than # spewing out in realtime. reward_logger = remote.RewardLogger() last_export = time.time() # Just make sure last_export time is set inside of pyprofile pyprofile.export(log=False, reset=False) # For tracking the framerate target = time.time() self.__vnc_last_update = target # real initial value while True: # Atomically recover details of the env state env_info = self.env_status.env_info() env_state = env_info['env_state'] # Hang out until it's all done resetting. We don't need to # reset any of our internal state here, as that happens # below. (We're not actually guaranteed to actually see a # resetting state, if the reset is very fast.) while env_state == 'resetting': logger.info('[%s] Blocking until env finishes resetting', utils.thread_name()) env_info = self.env_status.wait_for_env_state_change(env_state) logger.info('[%s] Unblocking since env reset finished', utils.thread_name()) env_state = env_info['env_state'] # Start our frame timing from here target = time.time() env_state = env_info['env_state'] episode_id = env_info['episode_id'] env_id = env_info['env_id'] fps = env_info['fps'] assert env_state == 'running', 'Env state: {}'.format(env_state) if fps is not None and fps != self.fps: logger.info('[%s] Changing fps: %s -> %s', fps, self.fps, utils.thread_name()) self.fps = fps # Detect whether the environment has reset, and thus we # need to clear our internal state. if env_id != self.env_id: assert episode_id != self._episode_id, 'Episode ids: {}->{}'.format( episode_id, self._episode_id) logger.info('[%s] Changing reward_parsers: %s -> %s', utils.thread_name(), self.env_id, env_id) # This is slow (since it has to load the scorer), so # we call it only in the rare case where the env ID # has changed. self.set_reward_parser(env_info) frames = 0 reward_logger.reset() elif episode_id != self._episode_id and self.reward_parser is None: # Just set internal state. This is fast since it # doesn't actually have to load anything. Also only # relevant during development. self.set_reward_parser(env_info) frames = 0 reward_logger.reset() elif episode_id != self._episode_id: # If the env state ID changed, then we need to reset # the reward parser. # # Not enough just to look whether the env_state is # resetting, since in theory we might never see the # resetting state for a very fast reset. logger.info( '[%s] Clearing reward_parser state: env_id=%s episode_id=%s->%s, env_state=%s', utils.thread_name(), env_id, self._episode_id, episode_id, env_state) self._episode_id = episode_id self.reward_parser.reset() frames = 0 reward_logger.reset() # Recover the exact reward with pyprofile.push('rewarder.compute_reward'): reward, done, info = self.reward() # done=None means we're not sure if the game is over or # not. done = bool(done) # Cut short the environment. Currently only used in debug # environment. if self.controlplane_spec is not None and \ self.controlplane_spec.server_timestep_limit is not None and \ frames >= self.controlplane_spec.server_timestep_limit: logger.info( '[%s] Marking environment as done=True since server_timestep_limit of %d frames reached', utils.thread_name(), frames) done = True # Add our own statistics if time.time() - last_export > 5: force_send = True last_export = time.time() profile = pyprofile.export() # Send the pyprofile to the agent. Info keys we set # will be available directly to the agent. info['rewarder.profile'] = profile else: force_send = False # Send if there's anything interesting to transmit if reward != 0 or done or force_send: if 'rewarder.profile' in info: # We already print the pyprofile (during the # export) so no need to repeat it here. It's # pretty big. display_info = info.copy() display_info['rewarder.profile'] = '<{} bytes>'.format( len(str(display_info['rewarder.profile']))) else: display_info = info reward_logger.record(reward, done, info) self.agent_conn.send_env_reward(reward, done, info, episode_id=episode_id) old_target = target # Run at the appropriate frame rate target += 1. / self.fps # Do appropriate sleeping delta = target - time.time() if done: # game_autoresets means the game itself will do the # reset, so we don't have to do any work. logger.info('[%s] Resetting environment since done=%s', utils.thread_name(), done) self.trigger_reset() elif delta > 0: pyprofile.timing('rewarder.sleep', delta) time.sleep(delta) else: pyprofile.timing('rewarder.sleep.missed', -delta) if delta < -0.1: logger.info( '[%s] Rewarder fell behind by %ss from target; losing %s frames', utils.thread_name(), -delta, int(-delta * self.fps)) target = time.time() # Record the total time spent in this frame, starting from the top pyprofile.timing('rewarder.frame', time.time() - old_target) frames += 1
def _step(self, action_n): if self._steps is None: self._start_timer() self._steps += 1 accum_observation_n, accum_reward_n, accum_done_n, accum_info = self._substep(action_n) accum_info['throttle.action.available_at'] = time.time() # Record which indexes we were just peeking at, so when we # make the follow-up we'll be sure to peek there too. peek_n = [any(spaces.PeekReward for peek in action) for action in action_n] if self.fps is None: return accum_observation_n, accum_reward_n, accum_done_n, accum_info accum_info['stats.throttle.sleep'] = 0 while True: # See how much time we have to idle delta = self._start + 1./self.fps * self._steps - time.time() # The following assumes that our control loop if delta < 0: # We're out of time. Just get out of here. delta = abs(delta) if delta >= 1: logger.info('Throttle fell behind by %.2fs; lost %.2f frames', delta, self.fps*delta) pyprofile.timing('vnc_env.Throttle.lost_sleep', delta) self._start_timer() break # elif delta < 0.008: # # Only have 8ms. Let's spend it sleeping, and # # return an image which may have up to an # # additional 8ms lag. # # # # 8ms is reasonably arbitrary; we just want something # # that's small where it's not actually going to help # # if we make another step call. Step with 32 parallel # # envs takes about 6ms (about half of which is # # diagnostics, which could be totally async!), so 8 is # # a reasonable choice for now.. # pyprofile.timing('vnc_env.Throttle.sleep', delta) # accum_info['stats.throttle.sleep'] += delta # time.sleep(delta) # break else: # We've got plenty of time. Sleep for up to 16ms, and # then refresh our current frame. We need to # constantly be calling step so that our lags are # reported correctly, within 16ms. (The layering is # such that the vncdriver doesn't know which pixels # correspond to metadata, and the diagnostics don't # know when pixels first got painted. So we do our # best to present frames as they're ready to the # diagnostics.) delta = min(delta, 0.016) pyprofile.timing('vnc_env.Throttle.sleep', delta) accum_info['stats.throttle.sleep'] += delta time.sleep(delta) # We want to merge in the latest reward/done/info so that our # agent has the most up-to-date info post-sleep, but also want # to avoid popping any rewards where done=True (since we'd # have to merge across episode boundaries). action_n = [] for done, peek in zip(accum_done_n, peek_n): if done or peek: # No popping of reward/done action_n.append([spaces.PeekReward]) else: action_n.append([]) observation_n, reward_n, done_n, info = self._substep(action_n) # Merge observation, rewards and metadata. # Text observation has order in which the messages are sent. rewarder.merge_n( accum_observation_n, accum_reward_n, accum_done_n, accum_info, observation_n, reward_n, done_n, info, ) return accum_observation_n, accum_reward_n, accum_done_n, accum_info