def apply(self, framebuffer_update): pyprofile.push('vncdriver.pyglet_screen.apply') for rect in framebuffer_update.rectangles: if isinstance(rect.encoding, (server_messages.RAWEncoding, server_messages.ZRLEEncoding, server_messages.ZlibEncoding)): self.update_rectangle(rect.x, rect.y, rect.width, rect.height, rect.encoding.data) else: raise error.Error('Unrecognized encoding: {}'.format(rect.encoding)) pyprofile.pop()
def __call__(self, img): cropped = crop(img, self.crop_coords) with pyprofile.push('score.crop_cache.get.{}'.format(self.name)): value = self.get(cropped) if value is None: with pyprofile.push('score.crop_cache.readthrough.{}'.format( self.name)): value = self.method(cropped) self.put(cropped, value) return value
def apply(self, framebuffer_update): pyprofile.push('vncdriver.pyglet_screen.apply') for rect in framebuffer_update.rectangles: if isinstance( rect.encoding, (server_messages.RAWEncoding, server_messages.ZRLEEncoding, server_messages.ZlibEncoding)): self.update_rectangle(rect.x, rect.y, rect.width, rect.height, rect.encoding.data) else: raise error.Error('Unrecognized encoding: {}'.format( rect.encoding)) pyprofile.pop()
def _step(self, action_n): with pyprofile.push('env.obs_buffer.step'): with pyprofile.push('env.obs_buffer.after_step'): observation_n, reward_n, done_n, info = self.env.step(action_n) num_c = self._num_channels for i in range(self.n): buffer = self.buffer_n[i] buffer[..., 0:num_c * (self.buffer_size - 1)] = buffer[..., num_c:num_c * self.buffer_size] buffer[..., -num_c:] = observation_n[0] return [buffer.copy() for buffer in self.buffer_n], reward_n, done_n, info
def _decode(self, observation, start, available_at): # This method gets wrapped by AsyncDecode.__call__ with pyprofile.push('vnc_env.diagnostics.QRCodeMetadataDecoder.qr_code_scanner'): encoded = fastzbarlight.qr_code_scanner(observation.tobytes(), self.width, self.height) if encoded is None: # Failed to parse! return if encoded.startswith(b'v1:'): encoded = encoded.decode('utf-8') if len(encoded) != len('v1:') + 12 + 12: raise error.Error('Bad length for metadata from enviroment: {}'.format(encoded)) encoded = encoded[len('v1:'):] last_update = int(encoded[:12], 16) / 1000.0 last_action = int(encoded[12:24], 16) / 1000. return { # Timestamp on the image 'now': last_update, # When the last probe was received 'probe_received_at': last_action, 'processing_start': start, 'processing_end': time.time(), 'available_at': available_at, } else: raise error.Error('Bad version string for metadata from environment: {}'.format(encoded))
def _decode(self, observation, start, available_at): # This method gets wrapped by AsyncDecode.__call__ with pyprofile.push( 'vnc_env.diagnostics.QRCodeMetadataDecoder.qr_code_scanner'): encoded = fastzbarlight.qr_code_scanner(observation.tobytes(), self.width, self.height) if encoded is None: # Failed to parse! return if encoded.startswith(b'v1:'): encoded = encoded.decode('utf-8') if len(encoded) != len('v1:') + 12 + 12: raise error.Error( 'Bad length for metadata from enviroment: {}'.format( encoded)) encoded = encoded[len('v1:'):] last_update = int(encoded[:12], 16) / 1000.0 last_action = int(encoded[12:24], 16) / 1000. return { # Timestamp on the image 'now': last_update, # When the last probe was received 'probe_received_at': last_action, 'processing_start': start, 'processing_end': time.time(), 'available_at': available_at, } else: raise error.Error( 'Bad version string for metadata from environment: {}'.format( encoded))
def _step(self, action_n): observation_n, reward_n, done_n, info = self.env.step(action_n) # We want this to be above Mask, so we know whether or not a # particular index is resetting. if self.unwrapped.diagnostics: with pyprofile.push('vnc_env.diagnostics.add_metadata'): self.unwrapped.diagnostics.add_metadata(observation_n, info['n']) return observation_n, reward_n, done_n, info
def _step(self, action_n): observation_n, reward_n, done_n, info = self.env.step(action_n) # We want this to be above Mask, so we know whether or not a # particular index is resetting. if self.diagnostics: with pyprofile.push('vnc_env.diagnostics.add_metadata'): self.diagnostics.add_metadata(observation_n, info['n']) return observation_n, reward_n, done_n, info
def add_metadata(self, observation_n, info_n, available_at=None): """Mutates the info_n dictionary.""" if self.instance_n is None: return with pyprofile.push('vnc_env.diagnostics.Diagnostics.add_metadata'): async = self.pool.imap_unordered( self._add_metadata_i, zip(self.instance_n, observation_n, info_n, [available_at] * len(observation_n))) list(async)
def add_metadata(self, observation_n, info_n, available_at=None): """Mutates the info_n dictionary.""" if self.instance_n is None: return with pyprofile.push('vnc_env.diagnostics.Diagnostics.add_metadata'): async = self.pool.imap_unordered( self._add_metadata_i, zip(self.instance_n, observation_n, info_n, [available_at] * len(observation_n))) list(async)
def _step(self, action_n): start = time.time() with pyprofile.push('vnc_env.Timer.step'): observation_n, reward_n, done_n, info = self.env.step(action_n) # Calculate how much time was spent actually doing work sleep = info.get('stats.throttle.sleep') if sleep is None or sleep < 0: sleep = 0 pyprofile.timing('vnc_env.Timer.step.excluding_sleep', time.time() - start - sleep) return observation_n, reward_n, done_n, info
def _step(self, action_n): start = time.time() with pyprofile.push('vnc_env.Timer.step'): observation_n, reward_n, done_n, info = self.env.step(action_n) # Calculate how much time was spent actually doing work sleep = info.get('stats.throttle.sleep') if sleep is None or sleep < 0: sleep = 0 pyprofile.timing('vnc_env.Timer.step.excluding_sleep', time.time() - start - sleep) return observation_n, reward_n, done_n, info
def _substep(self, action_n): with pyprofile.push('vnc_env.Throttle.step'): start = time.time() # Submit the action ASAP, before the thread goes to sleep. observation_n, reward_n, done_n, info = self.env.step(action_n) available_at = info['throttle.observation.available_at'] = time.time() if available_at - start > 1: logger.info('env.step took a long time: %.2fs', available_at - start) if not self.skip_metadata and self.diagnostics is not None: # Run (slow) diagnostics self.diagnostics.add_metadata(observation_n, info['n'], available_at=available_at) return observation_n, reward_n, done_n, info
def _pop_rewarder_session(self, peek_d): with pyprofile.push('vnc_env.VNCEnv.rewarder_session.pop'): reward_d, done_d, info_d, err_d = self.rewarder_session.pop(peek_d=peek_d) reward_n = [] done_n = [] info_n = [] err_n = [] for name in self.connection_names: reward_n.append(reward_d.get(name, 0)) done_n.append(done_d.get(name, False)) info_n.append(info_d.get(name, {'env_status.disconnected': True})) err_n.append(err_d.get(name)) return reward_n, done_n, info_n, err_n
def flip(self): pyprofile.push('vncdriver.numpy_screen.flip_bitmap') with self.lock: if self._back_updated: updates = self._defer # Flip screens front_screen, back_screen = self._screens self._screens = back_screen, front_screen # Mark ourselves as pending application of updates self._back_updated = False # This can be called asynchronously if desired, but it means # less reliably smooth playback. # # reactor.callFromThread(self.update_back) self.update_back() else: updates = [] result = self.peek(), {'vnc_session.framebuffer_updates': updates} pyprofile.pop() return result
def _pop_rewarder_session(self, peek_d): with pyprofile.push('vnc_env.VNCEnv.rewarder_session.pop'): reward_d, done_d, info_d, err_d = self.rewarder_session.pop(peek_d=peek_d) reward_n = [] done_n = [] info_n = [] err_n = [] for name in self.connection_names: reward_n.append(reward_d.get(name, 0)) done_n.append(done_d.get(name, False)) info_n.append(info_d.get(name, {'env_status.disconnected': True})) err_n.append(err_d.get(name)) return reward_n, done_n, info_n, err_n
def _observation(self, observation_n): with pyprofile.push('env.crop_screen.observation'): if self.normalize: return [ ob[self.top:self.top + self.height, self.left:self.left + self.width, :].astype(np.float32) * (1.0 / 255.0) if ob is not None else None for ob in observation_n ] else: return [ ob[self.top:self.top + self.height, self.left:self.left + self.width, :] if ob is not None else None for ob in observation_n ]
def _substep(self, action_n): with pyprofile.push('vnc_env.Throttle.step'): start = time.time() # Submit the action ASAP, before the thread goes to sleep. observation_n, reward_n, done_n, info = self.env.step(action_n) available_at = info[ 'throttle.observation.available_at'] = time.time() if available_at - start > 1: logger.info('env.step took a long time: %.2fs', available_at - start) if not self.skip_metadata and self.diagnostics is not None: # Run (slow) diagnostics self.diagnostics.add_metadata(observation_n, info['n'], available_at=available_at) return observation_n, reward_n, done_n, info
def _step_vnc_session(self, compiled_d): if self._send_actions_over_websockets: self.rewarder_session.send_action(compiled_d, self.spec.id) vnc_action_d = {} else: vnc_action_d = compiled_d with pyprofile.push('vnc_env.VNCEnv.vnc_session.step'): observation_d, info_d, err_d = self.vnc_session.step(vnc_action_d) observation_n = [] info_n = [] err_n = [] for name in self.connection_names: observation_n.append(observation_d.get(name)) info_n.append(info_d.get(name)) err_n.append(err_d.get(name)) return observation_n, info_n, err_n
def _step_vnc_session(self, compiled_d): if self._send_actions_over_websockets: self.rewarder_session.send_action(compiled_d, self.spec.id) vnc_action_d = {} else: vnc_action_d = compiled_d with pyprofile.push('vnc_env.VNCEnv.vnc_session.step'): observation_d, info_d, err_d = self.vnc_session.step(vnc_action_d) observation_n = [] info_n = [] err_n = [] for name in self.connection_names: observation_n.append(observation_d.get(name)) info_n.append(info_d.get(name)) err_n.append(err_d.get(name)) return observation_n, info_n, err_n
def _gameover(self, img): if self.vexpect is None: return None, None with pyprofile.push('reward.parsing.gameover'): return self.vexpect.gameover(img)
for i in range(args.max_steps): # print(observation_n) # user_input.handle_events() if render: # Note the first time you call render, it'll be relatively # slow and you'll have some aggregated rewards. We could # open the render() window before `reset()`, but that's # confusing since it pops up a black window for the # duration of the reset. env.render() action_n = agent(observation_n, reward_n, done_n) # Take an action with pyprofile.push('env.step'): observation_n, reward_n, done_n, info = env.step(action_n) episode_length += 1 if not all(r is None for r in reward_n): # checks if we connected the rewarder episode_score += np.array(reward_n) for i, ob in enumerate(observation_n): if ob is not None and (not isinstance(ob, dict) or ob['vision'] is not None): observation_count[i] += 1 scores = {} lengths = {} observations = {} for i, done in enumerate(done_n):
def _observation(self, observation_n): with pyprofile.push('env.flash_rescale.observation'): return [ _process_frame_flash(observation) for observation in observation_n ]
def main(): # You can optionally set up the logger. Also fine to set the level # to logging.DEBUG or logging.WARN if you want to change the # amount of output. logger.setLevel(logging.INFO) universe.configure_logging() parser = argparse.ArgumentParser(description=None) parser.add_argument('-e', '--env_id', default='gtav.SaneDriving-v0', help='Which environment to run on.') parser.add_argument('-m', '--monitor', action='store_false', help='Whether to activate the monitor.') parser.add_argument('-r', '--remote', help='The number of environments to create (e.g. -r 20), or the address of pre-existing VNC servers and rewarders to use (e.g. -r vnc://localhost:5900+15900,localhost:5901+15901), or a query to the allocator (e.g. -r http://allocator.sci.openai-tech.com?n=2)') parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') parser.add_argument('-R', '--no-render', action='store_true', help='Do not render the environment locally.') parser.add_argument('-f', '--fps', default=8., type=float, help='Desired frames per second') parser.add_argument('-N', '--max-steps', type=int, default=10**7, help='Maximum number of steps to take') parser.add_argument('-d', '--driver', default='DeepDriver', help='Choose your driver') parser.add_argument('-c', '--custom_camera', action='store_true', help='Customize the GTA camera') args = parser.parse_args() logging.getLogger('gym').setLevel(logging.NOTSET) logging.getLogger('universe').setLevel(logging.NOTSET) if args.verbosity == 0: logger.setLevel(logging.INFO) elif args.verbosity >= 1: logger.setLevel(logging.DEBUG) if args.env_id is not None: # N.B. This does not set the actual environment mode yet, which # is currently driven by environment itself. env = gym.make(args.env_id) else: env = wrappers.WrappedVNCEnv() if not isinstance(env, wrappers.GymCoreAction): # The GymCoreSyncEnv's try to mimic their core counterparts, # and thus came pre-wrapped wth an action space # translator. Everything else probably wants a SafeActionSpace # wrapper to shield them from random-agent clicking around # everywhere. env = wrappers.SafeActionSpace(env) else: # Only gym-core are seedable env.seed([0]) env = wrappers.Logger(env) env.configure( fps=args.fps, # print_frequency=None, # ignore_clock_skew=True, remotes=args.remote, vnc_driver='go', vnc_kwargs={ 'encoding': 'tight', 'compress_level': 0, 'fine_quality_level': 50, 'subsample_level': 0, 'quality_level': 5, }, ) if args.driver == 'DeepDriver': driver = DeepDriver() elif args.driver == 'DeepDriverTF': driver = DeepDriverTF() else: raise Exception('That driver is not available') driver.setup() if args.monitor: # env.monitor.start('/tmp/vnc_random_agent', force=True, video_callable=lambda i: True) wrappers.Monitor(env, '/tmp/vnc_random_agent', video_callable=False, force=True) render = not args.no_render observation_n = env.reset() reward_n = [0] * env.n done_n = [False] * env.n info = None for i in range(args.max_steps): if render: # Note the first time you call render, it'll be relatively # slow and you'll have some aggregated rewards. We could # open the render() window before `reset()`, but that's # confusing since it pops up a black window for the # duration of the reset. env.render() action_n = driver.step(observation_n, reward_n, done_n, info) try: if info is not None: distance = info['n'][0]['distance_from_destination'] logger.info('distance %s', distance) except KeyError as e: logger.debug('distance not available %s', str(e)) if args.custom_camera: # Sending this every step is probably overkill for action in action_n: action.append(GTASetting('use_custom_camera', True)) # Take an action with pyprofile.push('env.step'): _step = env.step(action_n) observation_n, reward_n, done_n, info = _step if any(done_n) and info and not any(info_n.get('env_status.artificial.done', False) for info_n in info['n']): print('done_n', done_n, 'i', i) logger.info('end of episode') env.reset() # We're done! clean up env.close()
for i in range(args.max_steps): # print(observation_n) # user_input.handle_events() if render: # Note the first time you call render, it'll be relatively # slow and you'll have some aggregated rewards. We could # open the render() window before `reset()`, but that's # confusing since it pops up a black window for the # duration of the reset. env.render() action_n = agent(observation_n, reward_n, done_n) # Take an action with pyprofile.push('env.step'): observation_n, reward_n, done_n, info = env.step(action_n) episode_length += 1 if not all(r is None for r in reward_n): # checks if we connected the rewarder episode_score += np.array(reward_n) for i, ob in enumerate(observation_n): if ob is not None and (not isinstance(ob, dict) or ob['vision'] is not None): observation_count[i] += 1 scores = {} lengths = {} observations = {} for i, done in enumerate(done_n): if not done: continue
def add_metadata(self, observation, info, available_at=None): """Extract metadata from a pixel observation and add it to the info """ observation = observation['vision'] if observation is None: return if self.network is not None and not self.network.active(): return elif self.metadata_decoder is None: return elif observation is None: return # should return a dict with now/probe_received_at keys with pyprofile.push('vnc_env.diagnostics.DiagnosticsInstance.add_metadata.decode'): metadata = self.metadata_decoder.decode(observation, available_at=available_at) if metadata is False: # No metadata ready, though it doesn't mean parsing failed metadata = None elif metadata is None: if self.could_read_metadata: self.could_read_metadata = False extra_logger.info('[%s] Stopped being able to read metadata (expected when environment resets)', self.label) elif not self.could_read_metadata: self.could_read_metadata = True extra_logger.info('[%s] Started being able to read metadata', self.label) if self.metadata_decoder.flag_synchronous and metadata is not None: info['diagnostics.image_remote_time'] = metadata['now'] local_now = time.time() if self.network is None: # Assume the clock skew is zero. Should only be run on the # same machine as the VNC server, such as the universe # instance inside of the environmenth containers. real_clock_skew = self.zero_clock_skew else: # Note: this is a 2-length vector of (min, max), so anything added to # it is also going to be a 2-length vector. # Most of the diagnostics below are, but you have to look carefully. real_clock_skew = self.network.reversed_clock_skew() # Store real clock skew here info['stats.gauges.diagnostics.clock_skew'] = real_clock_skew if self.ignore_clock_skew: clock_skew = self.zero_clock_skew else: clock_skew = real_clock_skew if metadata is not None: # We'll generally update the observation timestamp infrequently if self.last_observation_timestamp == metadata['now']: delta = None else: # We just got a new timestamp in the observation! self.last_observation_timestamp = metadata['now'] observation_now = metadata['now'] delta = observation_now - metadata['available_at'] # Subtract *local* time it was received from the *remote* time # displayed. Negate and reverse order to fix time ordering. info['stats.gauges.diagnostics.lag.observation'] = -(delta + clock_skew)[[1, 0]] # if self.network is None: # # The rest of diagnostics need the network, so we're done here # return probe_received_at = metadata['probe_received_at'] if probe_received_at == 0 or self.disable_action_probes: # Happens when the env first starts self.probe_received_at = None elif self.probe_received_at is None: # this also would work for the equality case self.probe_received_at = probe_received_at elif self.probe_received_at != probe_received_at and self.probe_sent_at is None: logger.info('[%s] Probe is marked as received at %s, but probe_sent_at is None. This is surprising. (HINT: do you have multiple universe instances talking to the same environment?)', self.label, probe_received_at) elif self.probe_received_at != probe_received_at: extra_logger.debug('[%s] Next probe received: old=%s new=%s', self.label, self.probe_received_at, probe_received_at) self.probe_received_at = probe_received_at # Subtract the *local* time we sent it from the *remote* time it was received self.action_latency_skewed = probe_received_at - self.probe_sent_at self.probe_sent_at = None if self.action_latency_skewed: action_lag = self.action_latency_skewed + clock_skew self.action_latency_skewed = None else: action_lag = None info['stats.gauges.diagnostics.lag.action'] = action_lag local_now = time.time() # Look at when the remote believed it parsed the score (not # all envs send this currently). # # Also, if we received no new rewards, then this values is # None. This could indicate a high reward latency (bad, # uncommon), or that the agent is calling step faster than new # rewards are coming in (good, common). remote_score_now = info.get('rewarder.lag.observation.timestamp') if remote_score_now is not None: delta = remote_score_now - local_now info['stats.gauges.diagnostics.lag.reward'] = -(delta + clock_skew)[[1, 0]] # Look at when the remote send the message, so we know how # long it's taking for messages to get to us. rewarder_message_now = info.get('reward_buffer.remote_time') if rewarder_message_now: delta = rewarder_message_now - local_now info['stats.gauges.diagnostics.lag.rewarder_message'] = -(delta + clock_skew)[[1, 0]]
def add_metadata(self, observation, info, available_at=None): """Extract metadata from a pixel observation and add it to the info """ observation = observation['vision'] if observation is None: return if self.network is not None and not self.network.active(): return elif self.metadata_decoder is None: return elif observation is None: return # should return a dict with now/probe_received_at keys with pyprofile.push( 'vnc_env.diagnostics.DiagnosticsInstance.add_metadata.decode'): metadata = self.metadata_decoder.decode(observation, available_at=available_at) if metadata is False: # No metadata ready, though it doesn't mean parsing failed metadata = None elif metadata is None: if self.could_read_metadata: self.could_read_metadata = False extra_logger.info( '[%s] Stopped being able to read metadata (expected when environment resets)', self.label) elif not self.could_read_metadata: self.could_read_metadata = True extra_logger.info('[%s] Started being able to read metadata', self.label) if self.metadata_decoder.flag_synchronous and metadata is not None: info['diagnostics.image_remote_time'] = metadata['now'] local_now = time.time() if self.network is None: # Assume the clock skew is zero. Should only be run on the # same machine as the VNC server, such as the universe # instance inside of the environmenth containers. real_clock_skew = self.zero_clock_skew else: # Note: this is a 2-length vector of (min, max), so anything added to # it is also going to be a 2-length vector. # Most of the diagnostics below are, but you have to look carefully. real_clock_skew = self.network.reversed_clock_skew() # Store real clock skew here info['stats.gauges.diagnostics.clock_skew'] = real_clock_skew if self.ignore_clock_skew: clock_skew = self.zero_clock_skew else: clock_skew = real_clock_skew if metadata is not None: # We'll generally update the observation timestamp infrequently if self.last_observation_timestamp == metadata['now']: delta = None else: # We just got a new timestamp in the observation! self.last_observation_timestamp = metadata['now'] observation_now = metadata['now'] delta = observation_now - metadata['available_at'] # Subtract *local* time it was received from the *remote* time # displayed. Negate and reverse order to fix time ordering. info['stats.gauges.diagnostics.lag.observation'] = -( delta + clock_skew)[[1, 0]] # if self.network is None: # # The rest of diagnostics need the network, so we're done here # return probe_received_at = metadata['probe_received_at'] if probe_received_at == 0 or self.disable_action_probes: # Happens when the env first starts self.probe_received_at = None elif self.probe_received_at is None: # this also would work for the equality case self.probe_received_at = probe_received_at elif self.probe_received_at != probe_received_at and self.probe_sent_at is None: logger.info( '[%s] Probe is marked as received at %s, but probe_sent_at is None. This is surprising. (HINT: do you have multiple universe instances talking to the same environment?)', self.label, probe_received_at) elif self.probe_received_at != probe_received_at: extra_logger.debug('[%s] Next probe received: old=%s new=%s', self.label, self.probe_received_at, probe_received_at) self.probe_received_at = probe_received_at # Subtract the *local* time we sent it from the *remote* time it was received self.action_latency_skewed = probe_received_at - self.probe_sent_at self.probe_sent_at = None if self.action_latency_skewed: action_lag = self.action_latency_skewed + clock_skew self.action_latency_skewed = None else: action_lag = None info['stats.gauges.diagnostics.lag.action'] = action_lag local_now = time.time() # Look at when the remote believed it parsed the score (not # all envs send this currently). # # Also, if we received no new rewards, then this values is # None. This could indicate a high reward latency (bad, # uncommon), or that the agent is calling step faster than new # rewards are coming in (good, common). remote_score_now = info.get('rewarder.lag.observation.timestamp') if remote_score_now is not None: delta = remote_score_now - local_now info['stats.gauges.diagnostics.lag.reward'] = -(delta + clock_skew)[[1, 0]] # Look at when the remote send the message, so we know how # long it's taking for messages to get to us. rewarder_message_now = info.get('reward_buffer.remote_time') if rewarder_message_now: delta = rewarder_message_now - local_now info['stats.gauges.diagnostics.lag.rewarder_message'] = -( delta + clock_skew)[[1, 0]]
def write_item(self, item): with pyprofile.push('recording.write'): l = json.dumps(item, skipkeys=True, default=self.json_encode) self.log_f.write(l + '\n') self.log_f.flush()
def _reset(self): with pyprofile.push('vnc_env.Timer.reset'): return self.env.reset()
def _score(self, img): if self.scorer is None: return None with pyprofile.push('reward.parsing.score'): return self.scorer.score(img)
def _reset(self): with pyprofile.push('vnc_env.Timer.reset'): return self.env.reset()
def do_run(self): # For debug environments which set a server-enforced time # limit frames = 0 # Log all the rewards, but buffer those logs rather than # spewing out in realtime. reward_logger = remote.RewardLogger() last_export = time.time() # Just make sure last_export time is set inside of pyprofile pyprofile.export(log=False, reset=False) # For tracking the framerate target = time.time() self.__vnc_last_update = target # real initial value while True: # Atomically recover details of the env state env_info = self.env_status.env_info() env_state = env_info['env_state'] # Hang out until it's all done resetting. We don't need to # reset any of our internal state here, as that happens # below. (We're not actually guaranteed to actually see a # resetting state, if the reset is very fast.) while env_state == 'resetting': logger.info('[%s] Blocking until env finishes resetting', utils.thread_name()) env_info = self.env_status.wait_for_env_state_change(env_state) logger.info('[%s] Unblocking since env reset finished', utils.thread_name()) env_state = env_info['env_state'] # Start our frame timing from here target = time.time() env_state = env_info['env_state'] episode_id = env_info['episode_id'] env_id = env_info['env_id'] fps = env_info['fps'] assert env_state == 'running', 'Env state: {}'.format(env_state) if fps is not None and fps != self.fps: logger.info('[%s] Changing fps: %s -> %s', fps, self.fps, utils.thread_name()) self.fps = fps # Detect whether the environment has reset, and thus we # need to clear our internal state. if env_id != self.env_id: assert episode_id != self._episode_id, 'Episode ids: {}->{}'.format( episode_id, self._episode_id) logger.info('[%s] Changing reward_parsers: %s -> %s', utils.thread_name(), self.env_id, env_id) # This is slow (since it has to load the scorer), so # we call it only in the rare case where the env ID # has changed. self.set_reward_parser(env_info) frames = 0 reward_logger.reset() elif episode_id != self._episode_id and self.reward_parser is None: # Just set internal state. This is fast since it # doesn't actually have to load anything. Also only # relevant during development. self.set_reward_parser(env_info) frames = 0 reward_logger.reset() elif episode_id != self._episode_id: # If the env state ID changed, then we need to reset # the reward parser. # # Not enough just to look whether the env_state is # resetting, since in theory we might never see the # resetting state for a very fast reset. logger.info( '[%s] Clearing reward_parser state: env_id=%s episode_id=%s->%s, env_state=%s', utils.thread_name(), env_id, self._episode_id, episode_id, env_state) self._episode_id = episode_id self.reward_parser.reset() frames = 0 reward_logger.reset() # Recover the exact reward with pyprofile.push('rewarder.compute_reward'): reward, done, info = self.reward() # done=None means we're not sure if the game is over or # not. done = bool(done) # Cut short the environment. Currently only used in debug # environment. if self.controlplane_spec is not None and \ self.controlplane_spec.server_timestep_limit is not None and \ frames >= self.controlplane_spec.server_timestep_limit: logger.info( '[%s] Marking environment as done=True since server_timestep_limit of %d frames reached', utils.thread_name(), frames) done = True # Add our own statistics if time.time() - last_export > 5: force_send = True last_export = time.time() profile = pyprofile.export() # Send the pyprofile to the agent. Info keys we set # will be available directly to the agent. info['rewarder.profile'] = profile else: force_send = False # Send if there's anything interesting to transmit if reward != 0 or done or force_send: if 'rewarder.profile' in info: # We already print the pyprofile (during the # export) so no need to repeat it here. It's # pretty big. display_info = info.copy() display_info['rewarder.profile'] = '<{} bytes>'.format( len(str(display_info['rewarder.profile']))) else: display_info = info reward_logger.record(reward, done, info) self.agent_conn.send_env_reward(reward, done, info, episode_id=episode_id) old_target = target # Run at the appropriate frame rate target += 1. / self.fps # Do appropriate sleeping delta = target - time.time() if done: # game_autoresets means the game itself will do the # reset, so we don't have to do any work. logger.info('[%s] Resetting environment since done=%s', utils.thread_name(), done) self.trigger_reset() elif delta > 0: pyprofile.timing('rewarder.sleep', delta) time.sleep(delta) else: pyprofile.timing('rewarder.sleep.missed', -delta) if delta < -0.1: logger.info( '[%s] Rewarder fell behind by %ss from target; losing %s frames', utils.thread_name(), -delta, int(-delta * self.fps)) target = time.time() # Record the total time spent in this frame, starting from the top pyprofile.timing('rewarder.frame', time.time() - old_target) frames += 1