def test_renderable_after_monitor_close(spec): # TODO(gdb 2016-05-15): Re-enable these tests after fixing box2d-py if spec._entry_point.startswith('gym.envs.box2d:'): logger.warn("Skipping tests for box2d env {}".format( spec._entry_point)) return # Skip mujoco tests skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE') or os.path.exists(os.path.expanduser('~/.mujoco'))) if skip_mujoco and spec._entry_point.startswith('gym.envs.mujoco:'): return with helpers.tempdir() as temp: env = spec.make() # Skip un-renderable envs if 'human' not in env.metadata.get('render.modes', []): return env.monitor.start(temp) env.reset() env.monitor.close() env.reset() env.render() env.render(close=True) env.close()
def test_benchmarks(): for benchmark_id in ['BernoulliBandit-v0', 'RandomTabularMDP-v0']: benchmark = registration.benchmark_spec(benchmark_id) for env_id in benchmark.env_ids: with helpers.tempdir() as temp: env = gym.make(env_id) env.seed(0) env.monitor.start(temp, video_callable=False) env.monitor.configure(mode='evaluation') rollout(env) env.monitor.configure(mode='training') for i in range(2): rollout(env) env.monitor.configure(mode='evaluation') rollout(env, good=True) env.monitor.close() results = monitoring.load_results(temp) evaluation_score = benchmark.score_evaluation( env_id, results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps']) benchmark.score_benchmark({ env_id: evaluation_score['scores'], })
def test_renderable_after_monitor_close(spec): # TODO(gdb 2016-05-15): Re-enable these tests after fixing box2d-py if spec._entry_point.startswith('gym.envs.box2d:'): logger.warn("Skipping tests for box2d env {}".format(spec._entry_point)) return # Skip mujoco tests skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE') or os.path.exists(os.path.expanduser('~/.mujoco'))) if skip_mujoco and spec._entry_point.startswith('gym.envs.mujoco:'): return with helpers.tempdir() as temp: env = spec.make() # Skip un-renderable envs if 'human' not in env.metadata.get('render.modes', []): return env.monitor.start(temp) env.reset() env.monitor.close() env.reset() env.render() env.render(close=True) env.close()
def test_close_monitor(): with helpers.tempdir() as temp: env = FakeEnv() env.monitor.start(temp) env.monitor.close() manifests = monitor.detect_training_manifests(temp) assert len(manifests) == 1
def test_video_callable_false_does_not_record(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env.monitor.start(temp, video_callable=False) env.reset() env.monitor.close() results = monitoring.load_results(temp) assert len(results['videos']) == 0
def test_video_callable_records_videos(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env.monitor.start(temp) env.reset() env.monitor.close() results = monitoring.load_results(temp) assert len(results['videos']) == 1, "Videos: {}".format(results['videos'])
def test_semisuper_succeeds(): """Regression test. Ensure that this can write""" with helpers.tempdir() as temp: env = gym.make('SemisuperPendulumDecay-v0') env = Monitor(temp)(env) env.reset() env.step(env.action_space.sample()) env.close()
def test_monitor_filename(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env.monitor.start(temp) env.monitor.close() manifests = glob.glob(os.path.join(temp, '*.manifest.*')) assert len(manifests) == 1
def test_semisuper_succeeds(): """Regression test. Ensure that this can write""" with helpers.tempdir() as temp: env = gym.make('SemisuperPendulumDecay-v0') env = Monitor(env, temp) env.reset() env.step(env.action_space.sample()) env.close()
def test_video_callable(): with helpers.tempdir() as temp: env = gym.make('Acrobot-v0') try: env.monitor.start(temp, video_callable=False) except error.Error: pass else: assert False
def test_video_callable_true_not_allowed(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') try: env.monitor.start(temp, video_callable=True) except error.Error: pass else: assert False
def test_write_upon_reset_false(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=False) env.reset() files = glob.glob(os.path.join(temp, '*')) assert not files, "Files: {}".format(files) env.close() files = glob.glob(os.path.join(temp, '*')) assert len(files) > 0
def test_write_upon_reset_true(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env.monitor.start(temp, video_callable=False, write_upon_reset=True) env.reset() files = glob.glob(os.path.join(temp, '*')) assert len(files) > 0, "Files: {}".format(files) env.monitor.close() files = glob.glob(os.path.join(temp, '*')) assert len(files) > 0
def test_env_reuse(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env.monitor.start(temp) env.monitor.close() env.monitor.start(temp, force=True) env.reset() env.step(env.action_space.sample()) env.step(env.action_space.sample()) env.monitor.close() results = monitor.load_results(temp) assert results['episode_lengths'] == [2], 'Results: {}'.format(results)
def test_vnc_monitoring(): with helpers.tempdir() as temp: env = gym.make('gym-core.Pong-v3') env = wrappers.GymCoreAction(env) env = wrappers.Monitor(temp)(env) env.configure(remotes=2) env.reset() for i in range(2): env.step([0, 0]) env.close() results = gym.monitoring.load_results(temp) assert results['env_info']['env_id'] == 'gym-core.Pong-v3'
def test_multiprocessing_env_monitoring(): with helpers.tempdir() as temp: env = wrappers.WrappedMultiprocessingEnv('Pong-v3') env = wrappers.Monitor(temp)(env) env.configure(n=2) env.reset() for i in range(2): env.step([0, 0]) env.close() manifests = glob.glob(os.path.join(temp, '*.video.*')) assert len(manifests) == 2, 'There are {} manifests: {}'.format(len(manifests), manifests) results = gym.monitoring.load_results(temp) assert results['env_info']['env_id'] == 'Pong-v3'
def test_env_reuse(): with helpers.tempdir() as temp: env = gym.make('Autoreset-v0') env.monitor.start(temp) env.reset() env.step(None) _, _, done, _ = env.step(None) assert done env.step(None) _, _, done, _ = env.step(None) assert done
def test_write_upon_reset_false(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(directory=temp, video_callable=False, write_upon_reset=False)(env) env.reset() files = glob.glob(os.path.join(temp, '*')) assert not files, "Files: {}".format(files) env.close() files = glob.glob(os.path.join(temp, '*')) assert len(files) > 0
def test_multiprocessing_env_monitoring(): with helpers.tempdir() as temp: env = wrappers.WrappedMultiprocessingEnv('Pong-v3') env = wrappers.Monitor(env, temp) env.configure(n=2) env.reset() for i in range(2): env.step([0, 0]) env.close() manifests = glob.glob(os.path.join(temp, '*.video.*')) assert len(manifests) == 2, 'There are {} manifests: {}'.format(len(manifests), manifests) results = gym.monitoring.load_results(temp) assert results['env_info']['env_id'] == 'Pong-v3'
def test_vnc_monitoring(): with helpers.tempdir() as temp: env = gym.make('gym-core.Pong-v3') env = wrappers.GymCoreAction(env) env = wrappers.Monitor(env, temp) env.configure(remotes=2) env.reset() for i in range(2): env.step([0, 0]) env.close() results = gym.monitoring.load_results(temp) assert results['env_info']['env_id'] == 'gym-core.Pong-v3'
def test_steps_limit_restart(): with helpers.tempdir() as temp: env = gym.make('test.StepsLimitCartpole-v0') env = Monitor(env, temp, video_callable=False) env.reset() # Episode has started _, _, done, info = env.step(env.action_space.sample()) assert done == False # Limit reached, now we get a done signal and the env resets itself _, _, done, info = env.step(env.action_space.sample()) assert done == True assert env.episode_id == 1 env.close()
def test_write_upon_reset_true(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') # TODO: Fix Cartpole to not configure itself automatically # assert not env._configured env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=True) env.configure() env.reset() files = glob.glob(os.path.join(temp, '*')) assert len(files) > 0, "Files: {}".format(files) env.close() files = glob.glob(os.path.join(temp, '*')) assert len(files) > 0
def test(): benchmark = registration.Benchmark(id='MyBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(), tasks=[{ 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 5 }, { 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 100, }]) with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = wrappers.Monitor(directory=temp, video_callable=False)(env) env.seed(0) env.set_monitor_mode('evaluation') rollout(env) env.set_monitor_mode('training') for i in range(2): rollout(env) env.set_monitor_mode('evaluation') rollout(env, good=True) env.close() results = monitoring.load_results(temp) evaluation_score = benchmark.score_evaluation( 'CartPole-v0', results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps']) benchmark_score = benchmark.score_benchmark({ 'CartPole-v0': evaluation_score['scores'], }) assert np.all( np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003 ])), "evaluation_score={}".format(evaluation_score) assert np.isclose( benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
def test_only_complete_episodes_written(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, temp, video_callable=False) env.reset() d = False while not d: _, _, d, _ = env.step(env.action_space.sample()) env.reset() env.step(env.action_space.sample()) env.close() # Only 1 episode should be written results = monitoring.load_results(temp) assert len(results['episode_lengths']) == 1, "Found {} episodes written; expecting 1".format(len(results['episode_lengths']))
def test(): benchmark = registration.Benchmark(id='MyBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(), task_groups={ 'CartPole-v0': [{ 'seeds': 1, 'timesteps': 5 }, { 'seeds': 1, 'timesteps': 100 }], }) with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env.monitor.start(temp, video_callable=False, seed=0) env.monitor.configure(mode='evaluation') rollout(env) env.monitor.configure(mode='training') for i in range(2): rollout(env) env.monitor.configure(mode='evaluation') rollout(env, good=True) env.monitor.close() results = monitoring.load_results(temp) evaluation_score = benchmark.score_evaluation( 'CartPole-v0', results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'], results['initial_reset_timestamp']) benchmark_score = benchmark.score_benchmark({ 'CartPole-v0': evaluation_score['scores'], }) assert np.all( np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003 ])), "evaluation_score={}".format(evaluation_score) assert np.isclose( benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
def test_env_reuse(): with helpers.tempdir() as temp: env = gym.make('Autoreset-v0') env = Monitor(env, temp) env.reset() _, _, done, _ = env.step(None) assert not done _, _, done, _ = env.step(None) assert done _, _, done, _ = env.step(None) assert not done _, _, done, _ = env.step(None) assert done env.close()
def test_only_complete_episodes_written(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env.monitor.start(temp, video_callable=False) env.reset() d = False while not d: _, _, d, _ = env.step(env.action_space.sample()) env.reset() env.step(env.action_space.sample()) env.monitor.close() # Only 1 episode should be written results = monitoring.load_results(temp) assert len(results['episode_lengths']) == 1, "Found {} episodes written; expecting 1".format(len(results['episode_lengths']))
def test_renderable_after_monitor_close(spec): # TODO(gdb 2016-05-15): Re-enable these tests after fixing box2d-py if spec._entry_point.startswith('gym.envs.box2d:'): logger.warn("Skipping tests for box2d env {}".format(spec._entry_point)) return with helpers.tempdir() as temp: env = spec.make() # Skip un-renderable envs if 'human' not in env.metadata.get('render.modes', []): return env.monitor.start(temp) env.reset() env.monitor.close() env.reset() env.render() env.render(close=True)
def test_renderable_after_monitor_close(spec): # TODO(gdb 2016-05-15): Re-enable these tests after fixing box2d-py if spec._entry_point.startswith('gym.envs.box2d:'): logger.warn("Skipping tests for box2d env {}".format( spec._entry_point)) return with helpers.tempdir() as temp: env = spec.make() # Skip un-renderable envs if 'human' not in env.metadata.get('render.modes', []): return env.monitor.start(temp) env.reset() env.monitor.close() env.reset() env.render() env.render(close=True)
def test_no_monitor_reset_unless_done(): def assert_reset_raises(env): errored = False try: env.reset() except error.Error: errored = True assert errored, "Env allowed a reset when it shouldn't have" with helpers.tempdir() as temp: # Make sure we can reset as we please without monitor env = gym.make('CartPole-v0') env.reset() env.step(env.action_space.sample()) env.step(env.action_space.sample()) env.reset() # can reset once as soon as we start env.monitor.start(temp, video_callable=False) env.reset() # can reset multiple times in a row env.reset() env.reset() env.step(env.action_space.sample()) env.step(env.action_space.sample()) assert_reset_raises(env) # should allow resets after the episode is done d = False while not d: _, _, d, _ = env.step(env.action_space.sample()) env.reset() env.reset() env.step(env.action_space.sample()) assert_reset_raises(env) env.monitor.close()
def test(): benchmark = registration.Benchmark( id='MyBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(), tasks=[ {'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 5 }, {'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 100, }]) with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = wrappers.Monitor(env, directory=temp, video_callable=False) env.seed(0) env.set_monitor_mode('evaluation') rollout(env) env.set_monitor_mode('training') for i in range(2): rollout(env) env.set_monitor_mode('evaluation') rollout(env, good=True) env.close() results = monitoring.load_results(temp) evaluation_score = benchmark.score_evaluation('CartPole-v0', results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps']) benchmark_score = benchmark.score_benchmark({ 'CartPole-v0': evaluation_score['scores'], }) assert np.all(np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003])), "evaluation_score={}".format(evaluation_score) assert np.isclose(benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
def test(): benchmark = registration.Benchmark( id='MyBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(), task_groups={ 'CartPole-v0': [{ 'seeds': 1, 'timesteps': 5 }, { 'seeds': 1, 'timesteps': 100 }], }) with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env.monitor.start(temp, video_callable=False, seed=0) env.monitor.configure(mode='evaluation') rollout(env) env.monitor.configure(mode='training') for i in range(2): rollout(env) env.monitor.configure(mode='evaluation') rollout(env, good=True) env.monitor.close() results = monitoring.load_results(temp) evaluation_score = benchmark.score_evaluation('CartPole-v0', results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'], results['initial_reset_timestamp']) benchmark_score = benchmark.score_benchmark({ 'CartPole-v0': evaluation_score['scores'], }) assert np.all(np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003])), "evaluation_score={}".format(evaluation_score) assert np.isclose(benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
import gym from gym.wrappers.monitoring import Monitor from gym.monitoring.tests import helpers with helpers.tempdir() as temp: env = gym.make('CartPole-v0') # 모니터 래핑 env = Monitor(temp)(env) #env.monitor.start(temp) #env.monitor.start('/tmp/cartpole-experiment-1') for i_episode in range(20): observation = env.reset() for t in range(500): env.render() action = env.action_space.sample() observation, reward, done, info = env.step(action) print(observation,reward,done,info) if done: print("Episode finished after {} timesteps".format(t+1)) break env.close() #env.monitor.close()