Example #1
0
def test_renderable_after_monitor_close(spec):
    # TODO(gdb 2016-05-15): Re-enable these tests after fixing box2d-py
    if spec._entry_point.startswith('gym.envs.box2d:'):
        logger.warn("Skipping tests for box2d env {}".format(
            spec._entry_point))
        return

    # Skip mujoco tests
    skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE')
                       or os.path.exists(os.path.expanduser('~/.mujoco')))
    if skip_mujoco and spec._entry_point.startswith('gym.envs.mujoco:'):
        return

    with helpers.tempdir() as temp:
        env = spec.make()
        # Skip un-renderable envs
        if 'human' not in env.metadata.get('render.modes', []):
            return

        env.monitor.start(temp)
        env.reset()
        env.monitor.close()

        env.reset()
        env.render()
        env.render(close=True)

        env.close()
Example #2
0
def test_benchmarks():
    for benchmark_id in ['BernoulliBandit-v0', 'RandomTabularMDP-v0']:

        benchmark = registration.benchmark_spec(benchmark_id)

        for env_id in benchmark.env_ids:

            with helpers.tempdir() as temp:
                env = gym.make(env_id)
                env.seed(0)
                env.monitor.start(temp, video_callable=False)

                env.monitor.configure(mode='evaluation')
                rollout(env)

                env.monitor.configure(mode='training')
                for i in range(2):
                    rollout(env)

                env.monitor.configure(mode='evaluation')
                rollout(env, good=True)

                env.monitor.close()
                results = monitoring.load_results(temp)
                evaluation_score = benchmark.score_evaluation(
                    env_id, results['data_sources'],
                    results['initial_reset_timestamps'],
                    results['episode_lengths'], results['episode_rewards'],
                    results['episode_types'], results['timestamps'])
                benchmark.score_benchmark({
                    env_id: evaluation_score['scores'],
                })
Example #3
0
def test_renderable_after_monitor_close(spec):
    # TODO(gdb 2016-05-15): Re-enable these tests after fixing box2d-py
    if spec._entry_point.startswith('gym.envs.box2d:'):
        logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
        return

    # Skip mujoco tests
    skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE') or os.path.exists(os.path.expanduser('~/.mujoco')))
    if skip_mujoco and spec._entry_point.startswith('gym.envs.mujoco:'):
        return

    with helpers.tempdir() as temp:
        env = spec.make()
        # Skip un-renderable envs
        if 'human' not in env.metadata.get('render.modes', []):
            return

        env.monitor.start(temp)
        env.reset()
        env.monitor.close()

        env.reset()
        env.render()
        env.render(close=True)

        env.close()
Example #4
0
def test_close_monitor():
    with helpers.tempdir() as temp:
        env = FakeEnv()
        env.monitor.start(temp)
        env.monitor.close()

        manifests = monitor.detect_training_manifests(temp)
        assert len(manifests) == 1
Example #5
0
def test_video_callable_false_does_not_record():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env.monitor.start(temp, video_callable=False)
        env.reset()
        env.monitor.close()
        results = monitoring.load_results(temp)
        assert len(results['videos']) == 0
Example #6
0
def test_video_callable_records_videos():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env.monitor.start(temp)
        env.reset()
        env.monitor.close()
        results = monitoring.load_results(temp)
        assert len(results['videos']) == 1, "Videos: {}".format(results['videos'])
Example #7
0
def test_semisuper_succeeds():
    """Regression test. Ensure that this can write"""
    with helpers.tempdir() as temp:
        env = gym.make('SemisuperPendulumDecay-v0')
        env = Monitor(temp)(env)
        env.reset()
        env.step(env.action_space.sample())
        env.close()
Example #8
0
def test_monitor_filename():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env.monitor.start(temp)
        env.monitor.close()

        manifests = glob.glob(os.path.join(temp, '*.manifest.*'))
        assert len(manifests) == 1
Example #9
0
def test_semisuper_succeeds():
    """Regression test. Ensure that this can write"""
    with helpers.tempdir() as temp:
        env = gym.make('SemisuperPendulumDecay-v0')
        env = Monitor(env, temp)
        env.reset()
        env.step(env.action_space.sample())
        env.close()
Example #10
0
def test_video_callable():
    with helpers.tempdir() as temp:
        env = gym.make('Acrobot-v0')
        try:
            env.monitor.start(temp, video_callable=False)
        except error.Error:
            pass
        else:
            assert False
Example #11
0
def test_video_callable_true_not_allowed():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        try:
            env.monitor.start(temp, video_callable=True)
        except error.Error:
            pass
        else:
            assert False
Example #12
0
def test_video_callable():
    with helpers.tempdir() as temp:
        env = gym.make('Acrobot-v0')
        try:
            env.monitor.start(temp, video_callable=False)
        except error.Error:
            pass
        else:
            assert False
Example #13
0
def test_write_upon_reset_false():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=False)
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert not files, "Files: {}".format(files)

        env.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0
Example #14
0
def test_write_upon_reset_true():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env.monitor.start(temp, video_callable=False, write_upon_reset=True)
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0, "Files: {}".format(files)

        env.monitor.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0
Example #15
0
def test_env_reuse():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env.monitor.start(temp)
        env.monitor.close()

        env.monitor.start(temp, force=True)
        env.reset()
        env.step(env.action_space.sample())
        env.step(env.action_space.sample())
        env.monitor.close()

        results = monitor.load_results(temp)
        assert results['episode_lengths'] == [2], 'Results: {}'.format(results)
Example #16
0
def test_vnc_monitoring():
    with helpers.tempdir() as temp:
        env = gym.make('gym-core.Pong-v3')
        env = wrappers.GymCoreAction(env)
        env = wrappers.Monitor(temp)(env)

        env.configure(remotes=2)
        env.reset()
        for i in range(2):
            env.step([0, 0])
        env.close()

        results = gym.monitoring.load_results(temp)
        assert results['env_info']['env_id'] == 'gym-core.Pong-v3'
Example #17
0
def test_multiprocessing_env_monitoring():
    with helpers.tempdir() as temp:
        env = wrappers.WrappedMultiprocessingEnv('Pong-v3')
        env = wrappers.Monitor(temp)(env)
        env.configure(n=2)
        env.reset()
        for i in range(2):
            env.step([0, 0])
        env.close()
        manifests = glob.glob(os.path.join(temp, '*.video.*'))
        assert len(manifests) == 2, 'There are {} manifests: {}'.format(len(manifests), manifests)

        results = gym.monitoring.load_results(temp)
        assert results['env_info']['env_id'] == 'Pong-v3'
Example #18
0
def test_env_reuse():
    with helpers.tempdir() as temp:
        env = gym.make('Autoreset-v0')
        env.monitor.start(temp)

        env.reset()

        env.step(None)
        _, _, done, _ = env.step(None)
        assert done

        env.step(None)
        _, _, done, _ = env.step(None)
        assert done
Example #19
0
def test_write_upon_reset_false():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(directory=temp,
                      video_callable=False,
                      write_upon_reset=False)(env)
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert not files, "Files: {}".format(files)

        env.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0
Example #20
0
def test_multiprocessing_env_monitoring():
    with helpers.tempdir() as temp:
        env = wrappers.WrappedMultiprocessingEnv('Pong-v3')
        env = wrappers.Monitor(env, temp)
        env.configure(n=2)
        env.reset()
        for i in range(2):
            env.step([0, 0])
        env.close()
        manifests = glob.glob(os.path.join(temp, '*.video.*'))
        assert len(manifests) == 2, 'There are {} manifests: {}'.format(len(manifests), manifests)

        results = gym.monitoring.load_results(temp)
        assert results['env_info']['env_id'] == 'Pong-v3'
Example #21
0
def test_vnc_monitoring():
    with helpers.tempdir() as temp:
        env = gym.make('gym-core.Pong-v3')
        env = wrappers.GymCoreAction(env)
        env = wrappers.Monitor(env, temp)

        env.configure(remotes=2)
        env.reset()
        for i in range(2):
            env.step([0, 0])
        env.close()

        results = gym.monitoring.load_results(temp)
        assert results['env_info']['env_id'] == 'gym-core.Pong-v3'
Example #22
0
def test_steps_limit_restart():
    with helpers.tempdir() as temp:
        env = gym.make('test.StepsLimitCartpole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()

        # Episode has started
        _, _, done, info = env.step(env.action_space.sample())
        assert done == False

        # Limit reached, now we get a done signal and the env resets itself
        _, _, done, info = env.step(env.action_space.sample())
        assert done == True
        assert env.episode_id == 1

        env.close()
Example #23
0
def test_steps_limit_restart():
    with helpers.tempdir() as temp:
        env = gym.make('test.StepsLimitCartpole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()

        # Episode has started
        _, _, done, info = env.step(env.action_space.sample())
        assert done == False

        # Limit reached, now we get a done signal and the env resets itself
        _, _, done, info = env.step(env.action_space.sample())
        assert done == True
        assert env.episode_id == 1

        env.close()
Example #24
0
def test_write_upon_reset_true():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')

        # TODO: Fix Cartpole to not configure itself automatically
        # assert not env._configured
        env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=True)
        env.configure()
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0, "Files: {}".format(files)

        env.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0
Example #25
0
def test():
    benchmark = registration.Benchmark(id='MyBenchmark-v0',
                                       scorer=scoring.ClipTo01ThenAverage(),
                                       tasks=[{
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 5
                                       }, {
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 100,
                                       }])

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = wrappers.Monitor(directory=temp, video_callable=False)(env)
        env.seed(0)

        env.set_monitor_mode('evaluation')
        rollout(env)

        env.set_monitor_mode('training')
        for i in range(2):
            rollout(env)

        env.set_monitor_mode('evaluation')
        rollout(env, good=True)

        env.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation(
            'CartPole-v0', results['data_sources'],
            results['initial_reset_timestamps'], results['episode_lengths'],
            results['episode_rewards'], results['episode_types'],
            results['timestamps'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0':
            evaluation_score['scores'],
        })

        assert np.all(
            np.isclose(evaluation_score['scores'],
                       [0.00089999999999999998, 0.0054000000000000003
                        ])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(
            benchmark_score,
            0.00315), "benchmark_score={}".format(benchmark_score)
Example #26
0
def test_only_complete_episodes_written():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()
        d = False
        while not d:
            _, _, d, _ = env.step(env.action_space.sample())

        env.reset()
        env.step(env.action_space.sample())

        env.close()

        # Only 1 episode should be written
        results = monitoring.load_results(temp)
        assert len(results['episode_lengths']) == 1, "Found {} episodes written; expecting 1".format(len(results['episode_lengths']))
Example #27
0
def test():
    benchmark = registration.Benchmark(id='MyBenchmark-v0',
                                       scorer=scoring.ClipTo01ThenAverage(),
                                       task_groups={
                                           'CartPole-v0': [{
                                               'seeds': 1,
                                               'timesteps': 5
                                           }, {
                                               'seeds': 1,
                                               'timesteps': 100
                                           }],
                                       })

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env.monitor.start(temp, video_callable=False, seed=0)

        env.monitor.configure(mode='evaluation')
        rollout(env)

        env.monitor.configure(mode='training')
        for i in range(2):
            rollout(env)

        env.monitor.configure(mode='evaluation')
        rollout(env, good=True)

        env.monitor.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation(
            'CartPole-v0', results['episode_lengths'],
            results['episode_rewards'], results['episode_types'],
            results['timestamps'], results['initial_reset_timestamp'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0':
            evaluation_score['scores'],
        })

        assert np.all(
            np.isclose(evaluation_score['scores'],
                       [0.00089999999999999998, 0.0054000000000000003
                        ])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(
            benchmark_score,
            0.00315), "benchmark_score={}".format(benchmark_score)
Example #28
0
def test_env_reuse():
    with helpers.tempdir() as temp:
        env = gym.make('Autoreset-v0')
        env = Monitor(env, temp)

        env.reset()

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        env.close()
Example #29
0
def test_env_reuse():
    with helpers.tempdir() as temp:
        env = gym.make('Autoreset-v0')
        env = Monitor(env, temp)

        env.reset()

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        env.close()
Example #30
0
def test_only_complete_episodes_written():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')

        env.monitor.start(temp, video_callable=False)
        env.reset()
        d = False
        while not d:
            _, _, d, _ = env.step(env.action_space.sample())

        env.reset()
        env.step(env.action_space.sample())

        env.monitor.close()

        # Only 1 episode should be written
        results = monitoring.load_results(temp)
        assert len(results['episode_lengths']) == 1, "Found {} episodes written; expecting 1".format(len(results['episode_lengths']))
Example #31
0
def test_write_upon_reset_true():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')

        # TODO: Fix Cartpole to not configure itself automatically
        # assert not env._configured
        env = Monitor(env,
                      directory=temp,
                      video_callable=False,
                      write_upon_reset=True)
        env.configure()
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0, "Files: {}".format(files)

        env.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0
Example #32
0
def test_renderable_after_monitor_close(spec):
    # TODO(gdb 2016-05-15): Re-enable these tests after fixing box2d-py
    if spec._entry_point.startswith('gym.envs.box2d:'):
        logger.warn("Skipping tests for box2d env {}".format(spec._entry_point))
        return

    with helpers.tempdir() as temp:
        env = spec.make()
        # Skip un-renderable envs
        if 'human' not in env.metadata.get('render.modes', []):
            return

        env.monitor.start(temp)
        env.reset()
        env.monitor.close()

        env.reset()
        env.render()
        env.render(close=True)
Example #33
0
def test_renderable_after_monitor_close(spec):
    # TODO(gdb 2016-05-15): Re-enable these tests after fixing box2d-py
    if spec._entry_point.startswith('gym.envs.box2d:'):
        logger.warn("Skipping tests for box2d env {}".format(
            spec._entry_point))
        return

    with helpers.tempdir() as temp:
        env = spec.make()
        # Skip un-renderable envs
        if 'human' not in env.metadata.get('render.modes', []):
            return

        env.monitor.start(temp)
        env.reset()
        env.monitor.close()

        env.reset()
        env.render()
        env.render(close=True)
Example #34
0
def test_no_monitor_reset_unless_done():
    def assert_reset_raises(env):
        errored = False
        try:
            env.reset()
        except error.Error:
            errored = True
        assert errored, "Env allowed a reset when it shouldn't have"

    with helpers.tempdir() as temp:
        # Make sure we can reset as we please without monitor
        env = gym.make('CartPole-v0')
        env.reset()
        env.step(env.action_space.sample())
        env.step(env.action_space.sample())
        env.reset()

        # can reset once as soon as we start
        env.monitor.start(temp, video_callable=False)
        env.reset()

        # can reset multiple times in a row
        env.reset()
        env.reset()

        env.step(env.action_space.sample())
        env.step(env.action_space.sample())
        assert_reset_raises(env)

        # should allow resets after the episode is done
        d = False
        while not d:
            _, _, d, _ = env.step(env.action_space.sample())

        env.reset()
        env.reset()

        env.step(env.action_space.sample())
        assert_reset_raises(env)

        env.monitor.close()
Example #35
0
def test():
    benchmark = registration.Benchmark(
        id='MyBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(),
        tasks=[
            {'env_id': 'CartPole-v0',
             'trials': 1,
             'max_timesteps': 5
            },
            {'env_id': 'CartPole-v0',
             'trials': 1,
             'max_timesteps': 100,
            }])

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = wrappers.Monitor(env, directory=temp, video_callable=False)
        env.seed(0)

        env.set_monitor_mode('evaluation')
        rollout(env)

        env.set_monitor_mode('training')
        for i in range(2):
            rollout(env)

        env.set_monitor_mode('evaluation')
        rollout(env, good=True)

        env.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation('CartPole-v0', results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0': evaluation_score['scores'],
        })

        assert np.all(np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
Example #36
0
def test():
    benchmark = registration.Benchmark(
        id='MyBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(),
        task_groups={
            'CartPole-v0': [{
                'seeds': 1,
                'timesteps': 5
            }, {
                'seeds': 1,
                'timesteps': 100
            }],
        })

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env.monitor.start(temp, video_callable=False, seed=0)

        env.monitor.configure(mode='evaluation')
        rollout(env)

        env.monitor.configure(mode='training')
        for i in range(2):
            rollout(env)

        env.monitor.configure(mode='evaluation')
        rollout(env, good=True)

        env.monitor.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation('CartPole-v0', results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'], results['initial_reset_timestamp'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0': evaluation_score['scores'],
        })

        assert np.all(np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
Example #37
0
import gym
from gym.wrappers.monitoring import Monitor
from gym.monitoring.tests import helpers


with helpers.tempdir() as temp:

    env = gym.make('CartPole-v0')
    # 모니터 래핑
    env = Monitor(temp)(env)
    #env.monitor.start(temp)
    #env.monitor.start('/tmp/cartpole-experiment-1')
    for i_episode in range(20):
        observation = env.reset()
        for t in range(500):
            env.render()
            action = env.action_space.sample()
            observation, reward, done, info = env.step(action)
            print(observation,reward,done,info)
            if done:
                print("Episode finished after {} timesteps".format(t+1))
                break

    env.close()
#env.monitor.close()