def create_dd_env(remotes=None, client_id=None): env = gym.make('flashgames.DuskDrive-v0') assert env.metadata['runtime.vectorized'] env = wrappers.Logger(env) env = wrappers.BlockingReset(env) env = wrappers.Vision(env) env = wrappers.EpisodeID(env) env = wrappers.Unvectorize(env) env.configure(remotes=remotes, fps=10, observer=True, client_id=client_id) return env
if args.env_id is not None: env = gym.make(args.env_id) else: env = wrappers.WrappedVNCEnv() # env = wrappers.BlockingReset(env) if not isinstance(env, wrappers.GymCoreAction): # The GymCoreSyncEnv's try to mimic their core counterparts, # and thus came pre-wrapped wth an action space # translator. Everything else probably wants a SafeActionSpace # wrapper to shield them from random-agent clicking around # everywhere. env = wrappers.experimental.SafeActionSpace(env) else: # Only gym-core are seedable env.seed([0]) env = wrappers.Logger(env) if args.monitor: env = wrappers.Monitor(env, '/tmp/vnc_random_agent', force=True) if args.actions == 'random': action_space = env.action_space elif args.actions == 'noop': action_space = NoopSpace() elif args.actions == 'forward': action_space = ForwardSpace() elif args.actions == 'click': spec = universe.runtime_spec('flashgames').server_registry[args.env_id] height = spec["height"] width = spec["width"] noclick_regions = [
def main(): # You can optionally set up the logger. Also fine to set the level # to logging.DEBUG or logging.WARN if you want to change the # amount of output. logger.setLevel(logging.INFO) universe.configure_logging() parser = argparse.ArgumentParser(description=None) parser.add_argument('-e', '--env_id', default='gtav.SaneDriving-v0', help='Which environment to run on.') parser.add_argument('-m', '--monitor', action='store_false', help='Whether to activate the monitor.') parser.add_argument('-r', '--remote', help='The number of environments to create (e.g. -r 20), or the address of pre-existing VNC servers and rewarders to use (e.g. -r vnc://localhost:5900+15900,localhost:5901+15901), or a query to the allocator (e.g. -r http://allocator.sci.openai-tech.com?n=2)') parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') parser.add_argument('-R', '--no-render', action='store_true', help='Do not render the environment locally.') parser.add_argument('-f', '--fps', default=8., type=float, help='Desired frames per second') parser.add_argument('-N', '--max-steps', type=int, default=10**7, help='Maximum number of steps to take') parser.add_argument('-d', '--driver', default='DeepDriver', help='Choose your driver') parser.add_argument('-c', '--custom_camera', action='store_true', help='Customize the GTA camera') args = parser.parse_args() logging.getLogger('gym').setLevel(logging.NOTSET) logging.getLogger('universe').setLevel(logging.NOTSET) if args.verbosity == 0: logger.setLevel(logging.INFO) elif args.verbosity >= 1: logger.setLevel(logging.DEBUG) if args.env_id is not None: # N.B. This does not set the actual environment mode yet, which # is currently driven by environment itself. env = gym.make(args.env_id) else: env = wrappers.WrappedVNCEnv() if not isinstance(env, wrappers.GymCoreAction): # The GymCoreSyncEnv's try to mimic their core counterparts, # and thus came pre-wrapped wth an action space # translator. Everything else probably wants a SafeActionSpace # wrapper to shield them from random-agent clicking around # everywhere. env = wrappers.SafeActionSpace(env) else: # Only gym-core are seedable env.seed([0]) env = wrappers.Logger(env) env.configure( fps=args.fps, # print_frequency=None, # ignore_clock_skew=True, remotes=args.remote, vnc_driver='go', vnc_kwargs={ 'encoding': 'tight', 'compress_level': 0, 'fine_quality_level': 50, 'subsample_level': 0, 'quality_level': 5, }, ) if args.driver == 'DeepDriver': driver = DeepDriver() elif args.driver == 'DeepDriverTF': driver = DeepDriverTF() else: raise Exception('That driver is not available') driver.setup() if args.monitor: # env.monitor.start('/tmp/vnc_random_agent', force=True, video_callable=lambda i: True) wrappers.Monitor(env, '/tmp/vnc_random_agent', video_callable=False, force=True) render = not args.no_render observation_n = env.reset() reward_n = [0] * env.n done_n = [False] * env.n info = None for i in range(args.max_steps): if render: # Note the first time you call render, it'll be relatively # slow and you'll have some aggregated rewards. We could # open the render() window before `reset()`, but that's # confusing since it pops up a black window for the # duration of the reset. env.render() action_n = driver.step(observation_n, reward_n, done_n, info) try: if info is not None: distance = info['n'][0]['distance_from_destination'] logger.info('distance %s', distance) except KeyError as e: logger.debug('distance not available %s', str(e)) if args.custom_camera: # Sending this every step is probably overkill for action in action_n: action.append(GTASetting('use_custom_camera', True)) # Take an action with pyprofile.push('env.step'): _step = env.step(action_n) observation_n, reward_n, done_n, info = _step if any(done_n) and info and not any(info_n.get('env_status.artificial.done', False) for info_n in info['n']): print('done_n', done_n, 'i', i) logger.info('end of episode') env.reset() # We're done! clean up env.close()
def main(): parser = argparse.ArgumentParser(description=None) parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') parser.add_argument('-r', '--remotes', default='vnc://127.0.0.1:5900', help='Which VNC address to connect to.') parser.add_argument( '-e', '--env-id', default=None, help= 'An env ID to optionally run upon startup (e.g. flashgames.DuskDrive-v0).' ) parser.add_argument('-V', '--no-vexpect', action='store_true', help='Whether to use vexpect.') parser.add_argument('-S', '--no-scorer', action='store_true', help='Whether to use the scorer.') parser.add_argument('-E', '--no-env', action='store_true', help='Whether to maintain an environment.') parser.add_argument('-I', '--integrator-mode', action='store_true', help='Whether to use vexpect.') parser.add_argument('-R', '--no-rewarder', action='store_true', help='Whether to enable the rewarder thread at all.') parser.add_argument('--rewarder-port', type=int, default=15900, help='Which port to start the agent_conn thread') parser.add_argument('--rewarder-fps', default=60, type=float, help='The frame rate for the rewarder.') parser.add_argument( '-i', '--idle-timeout', type=float, help= 'How long to keep the environment around when it has no active connections' ) parser.add_argument('--demonstration', action='store_true', help='Run a demonstration agent.') parser.add_argument( '--bot-demonstration', action='store_true', help= 'Run a demonstrationa agent that connects to the vnc_recorder port, to record complete demos with no human playing' ) args = parser.parse_args() # TODO: only activate in dev signal.signal(signal.SIGINT, lambda signal, frame: os._exit(10)) if args.verbosity == 0: logger.setLevel(logging.INFO) elif args.verbosity >= 1: logger.setLevel(logging.DEBUG) # Launch demonstration agent if requested if args.bot_demonstration and args.env_id is not None: cmd = "/app/universe-envs/controlplane/bin/demonstration_agent.py -e {} -r vnc://localhost:5899+15899 2>&1 | sed -e 's/^/[demonstration_agent] /'".format( pipes.quote(args.env_id)) logger.info('Launching demonstration agent in bot mode: %s', cmd) subprocess.Popen(cmd, shell=True) elif args.demonstration and args.env_id is not None: cmd = "/app/universe-envs/controlplane/bin/demonstration_agent.py -e {} 2>&1 | sed -e 's/^/[demonstration_agent] /'".format( pipes.quote(args.env_id)) logger.info('Launching demonstration agent: %s', cmd) subprocess.Popen(cmd, shell=True) logger.info( "Starting play_controlplane.py with the following: command=%s args=%s env=%s", sys.argv, args, os.environ) error_buffer = universe.utils.ErrorBuffer() env_status = universe.rewarder.EnvStatus() env_status.set_env_info(env_id=args.env_id, fps=args.rewarder_fps) cv = threading.Condition() control_buffer = remote.ControlBuffer(cv) agent_conn = remote.AgentConn(env_status, cv, control_buffer, error_buffer=error_buffer, idle_timeout=args.idle_timeout) agent_conn.listen(port=args.rewarder_port) # Logger gives us the diagnostics printing if not args.no_env: env = wrappers.Unvectorize( wrappers.Vision(wrappers.Logger(vnc_env.VNCEnv()))) # Assert when given self-referential rewarder connection # This shows up as a '+15900' or similar port number in the remotes string assert '+' not in args.remotes, "Remotes may not have rewarder ports" env.configure( remotes=args.remotes, ignore_clock_skew=True, disable_action_probes=True, vnc_driver='go', vnc_kwargs={ 'encoding': 'zrle', 'compress_level': 9 }, observer=True, ) else: logger.info( 'Running without environment, meaning reward and gameover parsing will be disabled' ) env = None no_vexpect = args.no_vexpect or args.integrator_mode env_controller = EnvController( env, args.remotes, env_status, agent_conn, error_buffer=error_buffer, control_buffer=control_buffer, no_vexpect=no_vexpect, integrator_mode=args.integrator_mode, ) env_controller.start() if not args.no_rewarder: rewarder = Rewarder( env, args.remotes, agent_conn, env_status=env_controller.env_status, trigger_reset=env_controller.trigger_reset, error_buffer=error_buffer, no_vexpect=no_vexpect, no_scorer=args.no_scorer, ) rewarder.start() else: rewarder = None manhole.install(locals={ 'rewarder': rewarder, 'env_controller': env_controller }) # TODO: clean up this API, but good enough for now while True: try: error_buffer.blocking_check(timeout=60) except remote.Exit as e: logger.info('%s', e) return 0 return 1