def _step(self, action_n): observation_n, reward_n, done_n, info = self.env.step(action_n) # We want this to be above Mask, so we know whether or not a # particular index is resetting. if self.unwrapped.diagnostics: with pyprofile.push('vnc_env.diagnostics.add_metadata'): self.unwrapped.diagnostics.add_metadata( observation_n, info['n']) return observation_n, reward_n, done_n, info
def add_metadata(self, observation_n, info_n, available_at=None): """Mutates the info_n dictionary.""" if self.instance_n is None: return with pyprofile.push('vnc_env.diagnostics.Diagnostics.add_metadata'): async = self.pool.imap_unordered( self._add_metadata_i, zip(self.instance_n, observation_n, info_n, [available_at] * len(observation_n))) list(async)
def _step(self, action_n): start = time.time() with pyprofile.push('vnc_env.Timer.step'): observation_n, reward_n, done_n, info = self.env.step(action_n) # Calculate how much time was spent actually doing work sleep = info.get('stats.throttle.sleep') if sleep is None or sleep < 0: sleep = 0 pyprofile.timing('vnc_env.Timer.step.excluding_sleep', time.time() - start - sleep) return observation_n, reward_n, done_n, info
def flip(self): pyprofile.push('vncdriver.numpy_screen.flip_bitmap') with self.lock: if self._back_updated: updates = self._defer # Flip screens front_screen, back_screen = self._screens self._screens = back_screen, front_screen # Mark ourselves as pending application of updates self._back_updated = False # This can be called asynchronously if desired, but it means # less reliably smooth playback. # # reactor.callFromThread(self.update_back) self.update_back() else: updates = [] result = self.peek(), {'vnc_session.framebuffer_updates': updates} pyprofile.pop() return result
def _pop_rewarder_session(self, peek_d): with pyprofile.push('vnc_env.VNCEnv.rewarder_session.pop'): reward_d, done_d, info_d, err_d = self.rewarder_session.pop( peek_d=peek_d) reward_n = [] done_n = [] info_n = [] err_n = [] for name in self.connection_names: reward_n.append(reward_d.get(name, 0)) done_n.append(done_d.get(name, False)) info_n.append(info_d.get(name, {'env_status.disconnected': True})) err_n.append(err_d.get(name)) return reward_n, done_n, info_n, err_n
def _substep(self, action_n): with pyprofile.push('vnc_env.Throttle.step'): start = time.time() # Submit the action ASAP, before the thread goes to sleep. observation_n, reward_n, done_n, info = self.env.step(action_n) available_at = info[ 'throttle.observation.available_at'] = time.time() if available_at - start > 1: logger.info('env.step took a long time: %.2fs', available_at - start) if not self.skip_metadata and self.diagnostics is not None: # Run (slow) diagnostics self.diagnostics.add_metadata(observation_n, info['n'], available_at=available_at) return observation_n, reward_n, done_n, info
def _step_vnc_session(self, compiled_d): if self._send_actions_over_websockets: self.rewarder_session.send_action(compiled_d, self.spec.id) vnc_action_d = {} else: vnc_action_d = compiled_d with pyprofile.push('vnc_env.VNCEnv.vnc_session.step'): observation_d, info_d, err_d = self.vnc_session.step(vnc_action_d) observation_n = [] info_n = [] err_n = [] for name in self.connection_names: observation_n.append(observation_d.get(name)) info_n.append(info_d.get(name)) err_n.append(err_d.get(name)) return observation_n, info_n, err_n
for i in range(args.max_steps): # print(observation_n) # user_input.handle_events() if render: # Note the first time you call render, it'll be relatively # slow and you'll have some aggregated rewards. We could # open the render() window before `reset()`, but that's # confusing since it pops up a black window for the # duration of the reset. env.render() action_n = agent(observation_n, reward_n, done_n) # Take an action with pyprofile.push('env.step'): observation_n, reward_n, done_n, info = env.step(action_n) episode_length += 1 if not all(r is None for r in reward_n): # checks if we connected the rewarder episode_score += np.array(reward_n) for i, ob in enumerate(observation_n): if ob is not None and (not isinstance(ob, dict) or ob['vision'] is not None): observation_count[i] += 1 scores = {} lengths = {} observations = {} for i, done in enumerate(done_n):
def add_metadata(self, observation, info, available_at=None): """Extract metadata from a pixel observation and add it to the info """ observation = observation['vision'] if observation is None: return if self.network is not None and not self.network.active(): return elif self.metadata_decoder is None: return elif observation is None: return # should return a dict with now/probe_received_at keys with pyprofile.push( 'vnc_env.diagnostics.DiagnosticsInstance.add_metadata.decode'): metadata = self.metadata_decoder.decode(observation, available_at=available_at) if metadata is False: # No metadata ready, though it doesn't mean parsing failed metadata = None elif metadata is None: if self.could_read_metadata: self.could_read_metadata = False extra_logger.info( '[%s] Stopped being able to read metadata (expected when environment resets)', self.label) elif not self.could_read_metadata: self.could_read_metadata = True extra_logger.info('[%s] Started being able to read metadata', self.label) if self.metadata_decoder.flag_synchronous and metadata is not None: info['diagnostics.image_remote_time'] = metadata['now'] local_now = time.time() if self.network is None: # Assume the clock skew is zero. Should only be run on the # same machine as the VNC server, such as the jiminy # instance inside of the environmenth containers. real_clock_skew = self.zero_clock_skew else: # Note: this is a 2-length vector of (min, max), so anything added to # it is also going to be a 2-length vector. # Most of the diagnostics below are, but you have to look carefully. real_clock_skew = self.network.reversed_clock_skew() # Store real clock skew here info['stats.gauges.diagnostics.clock_skew'] = real_clock_skew if self.ignore_clock_skew: clock_skew = self.zero_clock_skew else: clock_skew = real_clock_skew if metadata is not None: # We'll generally update the observation timestamp infrequently if self.last_observation_timestamp == metadata['now']: delta = None else: # We just got a new timestamp in the observation! self.last_observation_timestamp = metadata['now'] observation_now = metadata['now'] delta = observation_now - metadata['available_at'] # Subtract *local* time it was received from the *remote* time # displayed. Negate and reverse order to fix time ordering. info['stats.gauges.diagnostics.lag.observation'] = -( delta + clock_skew)[[1, 0]] # if self.network is None: # # The rest of diagnostics need the network, so we're done here # return probe_received_at = metadata['probe_received_at'] if probe_received_at == 0 or self.disable_action_probes: # Happens when the env first starts self.probe_received_at = None elif self.probe_received_at is None: # this also would work for the equality case self.probe_received_at = probe_received_at elif self.probe_received_at != probe_received_at and self.probe_sent_at is None: logger.info( '[%s] Probe is marked as received at %s, but probe_sent_at is None. This is surprising. (HINT: do you have multiple jiminy instances talking to the same environment?)', self.label, probe_received_at) elif self.probe_received_at != probe_received_at: extra_logger.debug('[%s] Next probe received: old=%s new=%s', self.label, self.probe_received_at, probe_received_at) self.probe_received_at = probe_received_at # Subtract the *local* time we sent it from the *remote* time it was received self.action_latency_skewed = probe_received_at - self.probe_sent_at self.probe_sent_at = None if self.action_latency_skewed: action_lag = self.action_latency_skewed + clock_skew self.action_latency_skewed = None else: action_lag = None info['stats.gauges.diagnostics.lag.action'] = action_lag local_now = time.time() # Look at when the remote believed it parsed the score (not # all envs send this currently). # # Also, if we received no new rewards, then this values is # None. This could indicate a high reward latency (bad, # uncommon), or that the agent is calling step faster than new # rewards are coming in (good, common). remote_score_now = info.get('rewarder.lag.observation.timestamp') if remote_score_now is not None: delta = remote_score_now - local_now info['stats.gauges.diagnostics.lag.reward'] = -(delta + clock_skew)[[1, 0]] # Look at when the remote send the message, so we know how # long it's taking for messages to get to us. rewarder_message_now = info.get('reward_buffer.remote_time') if rewarder_message_now: delta = rewarder_message_now - local_now info['stats.gauges.diagnostics.lag.rewarder_message'] = -( delta + clock_skew)[[1, 0]]
def write_item(self, item): with pyprofile.push('recording.write'): l = json.dumps(item, skipkeys=True, default=self.json_encode) self.log_f.write(l + '\n') self.log_f.flush()
def _reset(self): with pyprofile.push('vnc_env.Timer.reset'): return self.env.reset()