type=int, default=None, help='Rewarder session connection timeout (seconds)') args = parser.parse_args() logging.getLogger('gym').setLevel(logging.NOTSET) logging.getLogger('universe').setLevel(logging.NOTSET) if args.verbosity == 0: logger.setLevel(logging.INFO) elif args.verbosity >= 1: logger.setLevel(logging.DEBUG) if args.env_id is not None: env = gym.make(args.env_id) else: env = wrappers.WrappedVNCEnv() # env = wrappers.BlockingReset(env) if not isinstance(env, wrappers.GymCoreAction): # The GymCoreSyncEnv's try to mimic their core counterparts, # and thus came pre-wrapped wth an action space # translator. Everything else probably wants a SafeActionSpace # wrapper to shield them from random-agent clicking around # everywhere. env = wrappers.experimental.SafeActionSpace(env) else: # Only gym-core are seedable env.seed([0]) env = wrappers.Logger(env) if args.monitor: env = wrappers.Monitor(env, '/tmp/vnc_random_agent', force=True)
def main(): # You can optionally set up the logger. Also fine to set the level # to logging.DEBUG or logging.WARN if you want to change the # amount of output. logger.setLevel(logging.INFO) universe.configure_logging() parser = argparse.ArgumentParser(description=None) parser.add_argument('-e', '--env_id', default='gtav.SaneDriving-v0', help='Which environment to run on.') parser.add_argument('-m', '--monitor', action='store_false', help='Whether to activate the monitor.') parser.add_argument('-r', '--remote', help='The number of environments to create (e.g. -r 20), or the address of pre-existing VNC servers and rewarders to use (e.g. -r vnc://localhost:5900+15900,localhost:5901+15901), or a query to the allocator (e.g. -r http://allocator.sci.openai-tech.com?n=2)') parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') parser.add_argument('-R', '--no-render', action='store_true', help='Do not render the environment locally.') parser.add_argument('-f', '--fps', default=8., type=float, help='Desired frames per second') parser.add_argument('-N', '--max-steps', type=int, default=10**7, help='Maximum number of steps to take') parser.add_argument('-d', '--driver', default='DeepDriver', help='Choose your driver') parser.add_argument('-c', '--custom_camera', action='store_true', help='Customize the GTA camera') args = parser.parse_args() logging.getLogger('gym').setLevel(logging.NOTSET) logging.getLogger('universe').setLevel(logging.NOTSET) if args.verbosity == 0: logger.setLevel(logging.INFO) elif args.verbosity >= 1: logger.setLevel(logging.DEBUG) if args.env_id is not None: # N.B. This does not set the actual environment mode yet, which # is currently driven by environment itself. env = gym.make(args.env_id) else: env = wrappers.WrappedVNCEnv() if not isinstance(env, wrappers.GymCoreAction): # The GymCoreSyncEnv's try to mimic their core counterparts, # and thus came pre-wrapped wth an action space # translator. Everything else probably wants a SafeActionSpace # wrapper to shield them from random-agent clicking around # everywhere. env = wrappers.SafeActionSpace(env) else: # Only gym-core are seedable env.seed([0]) env = wrappers.Logger(env) env.configure( fps=args.fps, # print_frequency=None, # ignore_clock_skew=True, remotes=args.remote, vnc_driver='go', vnc_kwargs={ 'encoding': 'tight', 'compress_level': 0, 'fine_quality_level': 50, 'subsample_level': 0, 'quality_level': 5, }, ) if args.driver == 'DeepDriver': driver = DeepDriver() elif args.driver == 'DeepDriverTF': driver = DeepDriverTF() else: raise Exception('That driver is not available') driver.setup() if args.monitor: # env.monitor.start('/tmp/vnc_random_agent', force=True, video_callable=lambda i: True) wrappers.Monitor(env, '/tmp/vnc_random_agent', video_callable=False, force=True) render = not args.no_render observation_n = env.reset() reward_n = [0] * env.n done_n = [False] * env.n info = None for i in range(args.max_steps): if render: # Note the first time you call render, it'll be relatively # slow and you'll have some aggregated rewards. We could # open the render() window before `reset()`, but that's # confusing since it pops up a black window for the # duration of the reset. env.render() action_n = driver.step(observation_n, reward_n, done_n, info) try: if info is not None: distance = info['n'][0]['distance_from_destination'] logger.info('distance %s', distance) except KeyError as e: logger.debug('distance not available %s', str(e)) if args.custom_camera: # Sending this every step is probably overkill for action in action_n: action.append(GTASetting('use_custom_camera', True)) # Take an action with pyprofile.push('env.step'): _step = env.step(action_n) observation_n, reward_n, done_n, info = _step if any(done_n) and info and not any(info_n.get('env_status.artificial.done', False) for info_n in info['n']): print('done_n', done_n, 'i', i) logger.info('end of episode') env.reset() # We're done! clean up env.close()