def test_connect(): env = gym.make('flashgames.DuskDrive-v0') env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900') vnc_session = get_vnc_session(env) rewarder_session = get_rewarder_session(env) assert vnc_session._to_dict() == { '0': { 'name': '0', 'subsample_level': 2, 'encoding': 'tight', 'fine_quality_level': 50, 'start_timeout': 7, 'address': 'example.com:5900', 'password': '******' } } assert rewarder_session._to_dict() == { '0': { 'start_timeout': 7, 'seed': None, 'name': '0', 'fps': 60, 'address': 'example.com:15900', 'env_id': 'flashgames.DuskDrive-v0', 'password': '******', 'skip_network_calibration': False, 'observer': False, 'label': '0:example.com:5900' } }
def main(): parser = argparse.ArgumentParser(description=None) parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') args = parser.parse_args() if args.verbosity == 0: logger.setLevel(logging.INFO) elif args.verbosity >= 1: logger.setLevel(logging.DEBUG) env = gym.make('wob.mini.ClickDialog-v0') env = jiminy.wrappers.experimental.SoftmaxClickMouse(env) env = wob_vnc.MiniWoBCropper(env) wob_vnc.configure(env, wob_vnc.remotes_url(port_ofs=0, hostname='0.0.0.0', count=REMOTES_COUNT) ) # automatically creates a local docker container observation_n = env.reset() idx = 0 while True: # your agent here # # Try sending this instead of a random action: ('KeyEvent', 'ArrowUp', True) action_n = [env.action_space.sample() for ob in observation_n] observation_n, reward_n, done_n, info = env.step(action_n) print("idx: {}, reward: {}".format(idx * REMOTES_COUNT, reward_n)) idx += 1 return 0
def test_describe_handling(): env = gym.make('flashgames.DuskDrive-v0') env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900') env.reset() reward_buffer = get_reward_buffer(env) rewarder_client = get_rewarder_client(env) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '1'}) assert reward_buffer._remote_episode_id == '1' assert reward_buffer._remote_env_state == 'resetting' assert reward_buffer._current_episode_id == None assert reward_buffer.reward_state( reward_buffer._current_episode_id)._env_state == None rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'}) assert reward_buffer._remote_episode_id == '1' assert reward_buffer._remote_env_state == 'resetting' assert reward_buffer._current_episode_id == '1' assert reward_buffer.reward_state( reward_buffer._current_episode_id)._env_state == 'resetting'
def test_vnc_env(): env = gym.make('flashgames.DuskDrive-v0') env = wrappers.Unvectorize(env) env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900') env.reset() rewarder_client = get_rewarder_client(env) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '1'}) observation, reward, done, info = env.step( [spaces.KeyEvent.by_name('a', down=True)]) assert (observation, reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (None, 0, False, None, None) rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'}) observation, reward, done, info = env.step( [spaces.KeyEvent.by_name('a', down=True)]) assert (observation, reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (None, 0, False, 'resetting', '1') rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'running', 'fps': 60 }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 10, 'done': False, 'info': {} }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 15, 'done': False, 'info': {} }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': -3, 'done': False, 'info': {} }, {'episode_id': '1'}) observation, reward, done, info = env.step( [spaces.KeyEvent.by_name('a', down=True)]) assert sorted(observation.keys()) == ['text', 'vision'] assert observation['text'] == [] assert observation['vision'].shape == (768, 1024, 3) assert (reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (22, False, 'running', '1') assert info['stats.reward.count'] == 3
def test_monitor_filename(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, directory=temp) env.close() manifests = glob.glob(os.path.join(temp, '*.manifest.*')) assert len(manifests) == 1
def test_video_callable_false_does_not_record(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, temp, video_callable=False) env.reset() env.close() results = monitoring.load_results(temp) assert len(results['videos']) == 0
def test_steps_limit_restart_unused_when_not_wrapped(): env = gym.make('test.StepsLimitDummyVNCEnv-v0') env.configure(_n=1) env.reset() for i in range(10): _, _, done, info = env.step([[]]) assert done == [False]
def test_video_callable_records_videos(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, temp) env.reset() env.close() results = monitoring.load_results(temp) assert len(results['videos']) == 1, "Videos: {}".format(results['videos'])
def test_semisuper_succeeds(): """Regression test. Ensure that this can write""" with helpers.tempdir() as temp: env = gym.make('SemisuperPendulumDecay-v0') env = Monitor(env, temp) env.reset() env.step(env.action_space.sample()) env.close()
def test_text_envs(): env = gym.make('FrozenLake-v0') video = VideoRecorder(env) try: env.reset() video.capture_frame() video.close() finally: os.remove(video.path)
def main(): parser = argparse.ArgumentParser(description=None) parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') parser.add_argument('-o', '--output', required=True, help='Where to save trace.') parser.add_argument('-e', '--env-id', default='Pong-v3', help='Which env to run.') parser.add_argument('-s', '--vnc-address', default='127.0.0.1:5900', help='Address of the VNC server to run on.') parser.add_argument('-r', '--rewarder-address', default='127.0.0.1:15900', help='Address of the rewarder server to run on.') parser.add_argument('-S', '--seed', type=int, default=0, help='Set seed.') args = parser.parse_args() if args.verbosity == 0: logger.setLevel(logging.INFO) elif args.verbosity >= 1: logger.setLevel(logging.DEBUG) observations = [] vnc = args.env_id.startswith('VNC') env = gym.make(args.env_id) if args.seed is not None: env.seed(args.seed) if vnc: env.configure(vnc_address=args.vnc_address, rewarder_address=args.rewarder_address) noop = [] else: assert env.get_action_meanings()[0] == 'NOOP' noop = 0 ob = env.reset() observations.append(ob) for i in range(100): ob, reward, done, info = env.step(noop) observations.append(ob) np.save(args.output, observations) return 0
def test_semisuper_true_rewards(): env = gym.make('SemisuperPendulumNoise-v0') env.reset() observation, perceived_reward, done, info = env.step(env.action_space.sample()) true_reward = info['true_reward'] # The noise in the reward should ensure these are different. If we get spurious errors, we can remove this check assert perceived_reward != true_reward
def test_video_callable_true_not_allowed(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') try: env = Monitor(env, temp, video_callable=True) except error.Error: pass else: assert False
def test_joint(): env1 = gym.make('test.DummyVNCEnv-v0') env2 = gym.make('test.DummyVNCEnv-v0') env1.configure(_n=3) env2.configure(_n=3) for reward_buffer in [env1._reward_buffers[0], env2._reward_buffers[0]]: reward_buffer.set_env_info('running', 'test.DummyVNCEnv-v0', '1', 60) reward_buffer.reset('1') reward_buffer.push('1', 10, False, {}) env = wrappers.Joint([env1, env2]) assert env.n == 6 observation_n = env.reset() assert observation_n == [None] * 6 observation_n, reward_n, done_n, info = env.step([[] for _ in range(env.n)]) assert reward_n == [10.0, 0.0, 0.0, 10.0, 0.0, 0.0] assert done_n == [False] * 6
def test_record_simple(): env = gym.make("CartPole-v1") rec = VideoRecorder(env) env.reset() rec.capture_frame() rec.close() assert not rec.empty assert not rec.broken assert os.path.exists(rec.path) f = open(rec.path) assert os.fstat(f.fileno()).st_size > 100
def test_write_upon_reset_false(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=False) env.reset() files = glob.glob(os.path.join(temp, '*')) assert not files, "Files: {}".format(files) env.close() files = glob.glob(os.path.join(temp, '*')) assert len(files) > 0
def test_no_double_wrapping(): temp = tempfile.mkdtemp() try: env = gym.make("FrozenLake-v0") env = wrappers.Monitor(env, temp) try: env = wrappers.Monitor(env, temp) except error.DoubleWrapperError: pass else: assert False, "Should not allow double wrapping" env.close() finally: shutil.rmtree(temp)
def test_steps_limit_restart(): with helpers.tempdir() as temp: env = gym.make('test.StepsLimitCartpole-v0') env = Monitor(env, temp, video_callable=False) env.reset() # Episode has started _, _, done, info = env.step(env.action_space.sample()) assert done == False # Limit reached, now we get a done signal and the env resets itself _, _, done, info = env.step(env.action_space.sample()) assert done == True assert env.episode_id == 1 env.close()
def test(): benchmark = registration.Benchmark(id='MyBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(), tasks=[{ 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 5 }, { 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 100, }]) with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = wrappers.Monitor(env, directory=temp, video_callable=False) env.seed(0) env.set_monitor_mode('evaluation') rollout(env) env.set_monitor_mode('training') for i in range(2): rollout(env) env.set_monitor_mode('evaluation') rollout(env, good=True) env.close() results = monitoring.load_results(temp) evaluation_score = benchmark.score_evaluation( 'CartPole-v0', results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps']) benchmark_score = benchmark.score_benchmark({ 'CartPole-v0': evaluation_score['scores'], }) assert np.all( np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003 ])), "evaluation_score={}".format(evaluation_score) assert np.isclose( benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
def test_default_time_limit(): # We need an env without a default limit register( id='test.NoLimitDummyVNCEnv-v0', entry_point='jiminy.envs:DummyVNCEnv', tags={ 'vnc': True, }, ) env = gym.make('test.NoLimitDummyVNCEnv-v0') env.configure(_n=1) env = wrappers.TimeLimit(env) env.reset() assert env._max_episode_seconds == wrappers.time_limit.DEFAULT_MAX_EPISODE_SECONDS assert env._max_episode_steps == None
def test_only_complete_episodes_written(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, temp, video_callable=False) env.reset() d = False while not d: _, _, d, _ = env.step(env.action_space.sample()) env.reset() env.step(env.action_space.sample()) env.close() # Only 1 episode should be written results = monitoring.load_results(temp) assert len(results['episode_lengths']) == 1, "Found {} episodes written; expecting 1".format(len(results['episode_lengths']))
def test_steps_limit_restart(): env = gym.make('test.StepsLimitDummyVNCEnv-v0') env.configure(_n=1) env = wrappers.TimeLimit(env) env.reset() assert env._max_episode_seconds == None assert env._max_episode_steps == 2 # Episode has started _, _, done, info = env.step([[]]) assert done == [False] # Limit reached, now we get a done signal and the env resets itself _, _, done, info = env.step([[]]) assert done == [True] assert env._elapsed_steps == 0
def test_env_reuse(): with helpers.tempdir() as temp: env = gym.make('Autoreset-v0') env = Monitor(env, temp) env.reset() _, _, done, _ = env.step(None) assert not done _, _, done, _ = env.step(None) assert done _, _, done, _ = env.step(None) assert not done _, _, done, _ = env.step(None) assert done env.close()
def __init__(self, env, gym_core_id=None): super(GymCoreAction, self).__init__(env) if gym_core_id is None: # self.spec is None while inside of the make, so we need # to pass gym_core_id in explicitly there. This case will # be hit when instantiating by hand. gym_core_id = self.spec._kwargs['gym_core_id'] spec = gym.spec(gym_core_id) raw_action_space = gym_core_action_space(gym_core_id) self._actions = raw_action_space.actions self.action_space = gym_spaces.Discrete(len(self._actions)) if spec._entry_point.startswith('gym.envs.atari:'): self.key_state = translator.AtariKeyState(gym.make(gym_core_id)) else: self.key_state = None
def test_peek(): env = gym.make('flashgames.DuskDrive-v0') env = wrappers.Unvectorize(env) env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900') env.reset() rewarder_client = get_rewarder_client(env) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'}) observation, reward, done, info = env.step([spaces.PeekReward]) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '2'}) observation, reward, done, info = env.step([spaces.PeekReward]) assert info['mask.masked.observation'] assert info['mask.masked.action'] assert info['env_status.episode_id'] == '1' assert info['env_status.env_state'] == 'resetting' assert info['env_status.peek.episode_id'] == '2' assert info['env_status.peek.env_state'] == 'resetting' rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'running', 'fps': 60 }, {'episode_id': '2'}) observation, reward, done, info = env.step([spaces.PeekReward]) assert not info.get('mask.masked.observation') assert not info.get('mask.masked.action') assert info['env_status.episode_id'] == '1' assert info['env_status.env_state'] == 'resetting' assert info['env_status.peek.episode_id'] == '2' assert info['env_status.peek.env_state'] == 'running'
def test_no_monitor_reset_unless_done(): def assert_reset_raises(env): errored = False try: env.reset() except error.Error: errored = True assert errored, "Env allowed a reset when it shouldn't have" with helpers.tempdir() as temp: # Make sure we can reset as we please without monitor env = gym.make('CartPole-v0') env.reset() env.step(env.action_space.sample()) env.step(env.action_space.sample()) env.reset() # can reset once as soon as we start env = Monitor(env, temp, video_callable=False) env.reset() # can reset multiple times in a row env.reset() env.reset() env.step(env.action_space.sample()) env.step(env.action_space.sample()) assert_reset_raises(env) # should allow resets after the episode is done d = False while not d: _, _, d, _ = env.step(env.action_space.sample()) env.reset() env.reset() env.step(env.action_space.sample()) assert_reset_raises(env) env.close()
def load_demo(dir_name, env_name, read_text=False): """ Loads demonstration from the specified directory, filtering by env name :param dir_name: :param env_name: :return: list of (obs, action) tuples """ result = [] env = gym.make(env_name) env = jiminy.wrappers.experimental.SoftmaxClickMouse(env) def mouse_to_action(pointer_event): return env._action_to_discrete(pointer_event) for demo_dir in iterate_demo_dirs(dir_name, env_name): client_header, client_messages = \ read_fbp_file(os.path.join(demo_dir, "client.fbs"), rfp_client.RfpClient, rfp_client.RfpClient.Header, rfp_client.RfpClient.Message) srv_header, srv_messages = \ read_fbp_file(os.path.join(demo_dir, "server.fbs"), rfp_server.RfpServer, rfp_server.RfpServer.Header, rfp_server.RfpServer.Message) if read_text: text_entries = read_text_entries( os.path.join(demo_dir, "rewards.demo")) else: text_entries = None samples = extract_samples(client_header, client_messages, srv_header, srv_messages, text_entries=text_entries, mouse_to_action=mouse_to_action) result.extend(samples) return result
def test_boundary_simple(): env = gym.make('flashgames.DuskDrive-v0') env = wrappers.Unvectorize(env) env.configure(vnc_driver=FakeVNCSession, rewarder_driver=FakeRewarder, remotes='vnc://example.com:5900+15900') env.reset() rewarder_client = get_rewarder_client(env) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.reply.env.reset', {}, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 1, 'done': False, 'info': {} }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.reward', { 'reward': 2, 'done': True, 'info': {} }, {'episode_id': '1'}) rewarder_client._manual_recv('v0.env.describe', { 'env_id': 'flashgames.DuskDrive-v0', 'env_state': 'resetting', 'fps': 60 }, {'episode_id': '2'}) # We have reward of 3 for episode 1, and episode 2 should now be resetting observation, reward, done, info = env.step([]) assert info['mask.masked.observation'] assert info['mask.masked.action'] assert (reward, done, info['env_status.env_state'], info['env_status.episode_id']) == (3, True, 'resetting', '2')
parser.add_argument('-N', '--max-steps', type=int, default=10**7, help='Maximum number of steps to take') args = parser.parse_args() logging.getLogger('gym').setLevel(logging.NOTSET) logging.getLogger('jiminy').setLevel(logging.NOTSET) if args.verbosity == 0: logger.setLevel(logging.INFO) elif args.verbosity >= 1: logger.setLevel(logging.DEBUG) if args.env_id is not None: env = gym.make(args.env_id) else: env = wrappers.WrappedVNCEnv() # env = wrappers.BlockingReset(env) if not isinstance(env, wrappers.GymCoreAction): # The GymCoreSyncEnv's try to mimic their core counterparts, # and thus came pre-wrapped wth an action space # translator. Everything else probably wants a SafeActionSpace # wrapper to shield them from random-agent clicking around # everywhere. env = wrappers.SafeActionSpace(env) else: # Only gym-core are seedable env.seed([0]) env = wrappers.Logger(env)
name = env_name.split('.')[-1] + "_" + args.name writer = SummaryWriter(comment="-wob_click_" + name) saves_path = os.path.join(SAVES_DIR, name) os.makedirs(saves_path, exist_ok=True) demo_samples = None if args.demo: demo_samples = vnc_demo.load_demo(args.demo, env_name) if not demo_samples: demo_samples = None print("Demo not found") else: print("Loaded %d demo samples, will use them during training" % len(demo_samples)) env = gym.make(env_name) env = jiminy.wrappers.experimental.SoftmaxClickMouse(env) env = wob_vnc.MiniWoBCropper(env) wob_vnc.configure(env, wob_vnc.remotes_url(port_ofs=args.port_ofs, hostname=args.host, count=REMOTES_COUNT)) net = model_vnc.Model(input_shape=wob_vnc.WOB_SHAPE, n_actions=env.action_space.n).to(device) print(net) optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3) agent = ptan.agent.PolicyAgent(lambda x: net(x)[0], device=device, apply_softmax=True) exp_source = ptan.experience.ExperienceSourceFirstLast( [env], agent, gamma=GAMMA, steps_count=REWARD_STEPS, vectorized=True) best_reward = None with common.RewardTracker(writer) as tracker: with ptan.common.utils.TBMeanTracker(writer, batch_size=10) as tb_tracker: