def test_vnc_env(): env = gym.make('flashgames.DuskDrive-v0') env = wrappers.Unvectorize(env) env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900') env.reset() rewarder_client = get_rewarder_client(env) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '1'}) observation, reward, done, info = env.step( [spaces.KeyEvent.by_name('a', down=True)]) assert (observation, reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (None, 0, False, None, None) rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'}) observation, reward, done, info = env.step( [spaces.KeyEvent.by_name('a', down=True)]) assert (observation, reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (None, 0, False, 'resetting', '1') rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'running', 'fps': 60 }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 10, 'done': False, 'info': {} }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 15, 'done': False, 'info': {} }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': -3, 'done': False, 'info': {} }, {'episode_id': '1'}) observation, reward, done, info = env.step( [spaces.KeyEvent.by_name('a', down=True)]) assert sorted(observation.keys()) == ['text', 'vision'] assert observation['text'] == [] assert observation['vision'].shape == (768, 1024, 3) assert (reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (22, False, 'running', '1') assert info['stats.reward.count'] == 3
def create_dd_env(remotes=None, client_id=None): env = gym.make('flashgames.DuskDrive-v0') assert env.metadata['runtime.vectorized'] env = wrappers.Logger(env) env = wrappers.BlockingReset(env) env = wrappers.Vision(env) env = wrappers.EpisodeID(env) env = wrappers.Unvectorize(env) env.configure(remotes=remotes, fps=10, observer=True, client_id=client_id) return env
def test_smoke(env_id): """Check that environments start up without errors and that we can extract rewards and observations""" gym.undo_logger_setup() logging.getLogger().setLevel(logging.INFO) env = gym.make(env_id) env = wrappers.Unvectorize(env) if os.environ.get('FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES' ): # Used to test universe-envs in CI configure_with_latest_docker_runtime_tag(env) else: env.configure(remotes=1) env.reset() _rollout(env, timestep_limit=60 * 30) # Check a rollout
def test_peek(): env = gym.make('flashgames.DuskDrive-v0') env = wrappers.Unvectorize(env) env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900') env.reset() rewarder_client = get_rewarder_client(env) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'}) observation, reward, done, info = env.step([spaces.PeekReward]) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '2'}) observation, reward, done, info = env.step([spaces.PeekReward]) assert info['mask.masked.observation'] assert info['mask.masked.action'] assert info['env_status.episode_id'] == '1' assert info['env_status.env_state'] == 'resetting' assert info['env_status.peek.episode_id'] == '2' assert info['env_status.peek.env_state'] == 'resetting' rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'running', 'fps': 60 }, {'episode_id': '2'}) observation, reward, done, info = env.step([spaces.PeekReward]) assert not info.get('mask.masked.observation') assert not info.get('mask.masked.action') assert info['env_status.episode_id'] == '1' assert info['env_status.env_state'] == 'resetting' assert info['env_status.peek.episode_id'] == '2' assert info['env_status.peek.env_state'] == 'running'
def test_nice_vnc_semantics_match(spec, matcher, wrapper): # Check that when running over VNC or using the raw environment, # semantics match exactly. gym.undo_logger_setup() logging.getLogger().setLevel(logging.INFO) spaces.seed(0) vnc_env = spec.make() if vnc_env.metadata.get('configure.required', False): vnc_env.configure(remotes=1) vnc_env = wrapper(vnc_env) vnc_env = wrappers.Unvectorize(vnc_env) env = gym.make(spec._kwargs['gym_core_id']) env.seed(0) vnc_env.seed(0) # Check that reset observations work reset(matcher, env, vnc_env, stage='initial reset') # Check a full rollout rollout(matcher, env, vnc_env, timestep_limit=50, stage='50 steps') # Reset to start a new episode reset(matcher, env, vnc_env, stage='reset to new episode') # Check that a step into the next episode works rollout(matcher, env, vnc_env, timestep_limit=1, stage='1 step in new episode') # Make sure env can be reseeded env.seed(1) vnc_env.seed(1) reset(matcher, env, vnc_env, 'reseeded reset') rollout(matcher, env, vnc_env, timestep_limit=1, stage='reseeded step')
def test_boundary_simple(): env = gym.make('flashgames.DuskDrive-v0') env = wrappers.Unvectorize(env) env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900') env.reset() rewarder_client = get_rewarder_client(env) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 1, 'done': False, 'info': {} }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 2, 'done': True, 'info': {} }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '2'}) # We have reward of 3 for episode 1, and episode 2 should now be resetting observation, reward, done, info = env.step([]) assert info['mask.masked.observation'] assert info['mask.masked.action'] assert (reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (3, True, 'resetting', '2')
def test_boundary_multiple(): env = gym.make('flashgames.DuskDrive-v0') env = wrappers.Unvectorize(env) env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900') env.reset() rewarder_client = get_rewarder_client(env) # episode 2 rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '2'}) rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '2'}) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'running', 'fps': 60 }, {'episode_id': '2'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 2, 'done': True, 'info': {} }, {'episode_id': '2'}) # episode 3 rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '3'}) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'running', 'fps': 60 }, {'episode_id': '3'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 3, 'done': True, 'info': {} }, {'episode_id': '3'}) # episode 4 rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '4'}) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'running', 'fps': 60 }, {'episode_id': '4'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 4, 'done': False, 'info': {} }, {'episode_id': '4'}) observation, reward, done, info = env.step([]) assert not info.get('mask.masked.observation') assert not info.get('mask.masked.action') assert (reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (2, True, 'running', '4') assert (info['env_status.complete.env_state'], info['env_status.complete.episode_id']) == ('running', '2') observation, reward, done, info = env.step([]) assert (reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (4, False, 'running', '4')
def main(): parser = argparse.ArgumentParser(description=None) parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') parser.add_argument('-r', '--remotes', default='vnc://127.0.0.1:5900', help='Which VNC address to connect to.') parser.add_argument( '-e', '--env-id', default=None, help= 'An env ID to optionally run upon startup (e.g. flashgames.DuskDrive-v0).' ) parser.add_argument('-V', '--no-vexpect', action='store_true', help='Whether to use vexpect.') parser.add_argument('-S', '--no-scorer', action='store_true', help='Whether to use the scorer.') parser.add_argument('-E', '--no-env', action='store_true', help='Whether to maintain an environment.') parser.add_argument('-I', '--integrator-mode', action='store_true', help='Whether to use vexpect.') parser.add_argument('-R', '--no-rewarder', action='store_true', help='Whether to enable the rewarder thread at all.') parser.add_argument('--rewarder-port', type=int, default=15900, help='Which port to start the agent_conn thread') parser.add_argument('--rewarder-fps', default=60, type=float, help='The frame rate for the rewarder.') parser.add_argument( '-i', '--idle-timeout', type=float, help= 'How long to keep the environment around when it has no active connections' ) parser.add_argument('--demonstration', action='store_true', help='Run a demonstration agent.') parser.add_argument( '--bot-demonstration', action='store_true', help= 'Run a demonstrationa agent that connects to the vnc_recorder port, to record complete demos with no human playing' ) args = parser.parse_args() # TODO: only activate in dev signal.signal(signal.SIGINT, lambda signal, frame: os._exit(10)) if args.verbosity == 0: logger.setLevel(logging.INFO) elif args.verbosity >= 1: logger.setLevel(logging.DEBUG) # Launch demonstration agent if requested if args.bot_demonstration and args.env_id is not None: cmd = "/app/universe-envs/controlplane/bin/demonstration_agent.py -e {} -r vnc://localhost:5899+15899 2>&1 | sed -e 's/^/[demonstration_agent] /'".format( pipes.quote(args.env_id)) logger.info('Launching demonstration agent in bot mode: %s', cmd) subprocess.Popen(cmd, shell=True) elif args.demonstration and args.env_id is not None: cmd = "/app/universe-envs/controlplane/bin/demonstration_agent.py -e {} 2>&1 | sed -e 's/^/[demonstration_agent] /'".format( pipes.quote(args.env_id)) logger.info('Launching demonstration agent: %s', cmd) subprocess.Popen(cmd, shell=True) logger.info( "Starting play_controlplane.py with the following: command=%s args=%s env=%s", sys.argv, args, os.environ) error_buffer = universe.utils.ErrorBuffer() env_status = universe.rewarder.EnvStatus() env_status.set_env_info(env_id=args.env_id, fps=args.rewarder_fps) cv = threading.Condition() control_buffer = remote.ControlBuffer(cv) agent_conn = remote.AgentConn(env_status, cv, control_buffer, error_buffer=error_buffer, idle_timeout=args.idle_timeout) agent_conn.listen(port=args.rewarder_port) # Logger gives us the diagnostics printing if not args.no_env: env = wrappers.Unvectorize( wrappers.Vision(wrappers.Logger(vnc_env.VNCEnv()))) # Assert when given self-referential rewarder connection # This shows up as a '+15900' or similar port number in the remotes string assert '+' not in args.remotes, "Remotes may not have rewarder ports" env.configure( remotes=args.remotes, ignore_clock_skew=True, disable_action_probes=True, vnc_driver='go', vnc_kwargs={ 'encoding': 'zrle', 'compress_level': 9 }, observer=True, ) else: logger.info( 'Running without environment, meaning reward and gameover parsing will be disabled' ) env = None no_vexpect = args.no_vexpect or args.integrator_mode env_controller = EnvController( env, args.remotes, env_status, agent_conn, error_buffer=error_buffer, control_buffer=control_buffer, no_vexpect=no_vexpect, integrator_mode=args.integrator_mode, ) env_controller.start() if not args.no_rewarder: rewarder = Rewarder( env, args.remotes, agent_conn, env_status=env_controller.env_status, trigger_reset=env_controller.trigger_reset, error_buffer=error_buffer, no_vexpect=no_vexpect, no_scorer=args.no_scorer, ) rewarder.start() else: rewarder = None manhole.install(locals={ 'rewarder': rewarder, 'env_controller': env_controller }) # TODO: clean up this API, but good enough for now while True: try: error_buffer.blocking_check(timeout=60) except remote.Exit as e: logger.info('%s', e) return 0 return 1
dest='verbosity', default=0, help='Set verbosity.') args = parser.parse_args() if args.verbosity == 0: logger.setLevel(logging.INFO) elif args.verbosity >= 1: logger.setLevel(logging.DEBUG) # Record episodes as we go episode_scores = [] current_episode_score = 0. rewarded_during_current_episode = False while True: env = wrappers.Unvectorize(gym.make(args.env_id)) # Jot down the env_id so the uploader can find it later env_id_file_dir = os.path.join(os.sep, 'tmp', 'demo') env_id_file_path = os.path.join(env_id_file_dir, 'env_id.txt') if not os.path.exists(env_id_file_dir): logger.info("[DemonstrationAgent] Creating directory %s", env_id_file_dir) os.makedirs(env_id_file_dir) with open(env_id_file_path, 'w') as env_id_file: logger.info("[DemonstrationAgent] Writing env id to file %s", env_id_file_path) env_id_file.write(args.env_id) # Connect through our recording proxies env.configure(remotes=args.remote, fps=args.fps, observer=True)