예제 #1
0
def test_write_upon_reset_false():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env,
                      directory=temp,
                      video_callable=False,
                      write_upon_reset=False)
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert not files, "Files: {}".format(files)

        env.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0
예제 #2
0
def test_write_upon_reset_true():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')

        # TODO: Fix Cartpole to not configure itself automatically
        # assert not env._configured
        env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=True)
        env.configure()
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0, "Files: {}".format(files)

        env.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0
예제 #3
0
def test_steps_limit_restart():
    with helpers.tempdir() as temp:
        env = gym.make('test.StepsLimitCartpole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()

        # Episode has started
        _, _, done, info = env.step(env.action_space.sample())
        assert done == False

        # Limit reached, now we get a done signal and the env resets itself
        _, _, done, info = env.step(env.action_space.sample())
        assert done == True
        assert env.episode_id == 1

        env.close()
예제 #4
0
def test():
    benchmark = registration.Benchmark(id='MyBenchmark-v0',
                                       scorer=scoring.ClipTo01ThenAverage(),
                                       tasks=[{
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 5
                                       }, {
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 100,
                                       }])

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = wrappers.Monitor(env, directory=temp, video_callable=False)
        env.seed(0)

        env.set_monitor_mode('evaluation')
        rollout(env)

        env.set_monitor_mode('training')
        for i in range(2):
            rollout(env)

        env.set_monitor_mode('evaluation')
        rollout(env, good=True)

        env.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation(
            'CartPole-v0', results['data_sources'],
            results['initial_reset_timestamps'], results['episode_lengths'],
            results['episode_rewards'], results['episode_types'],
            results['timestamps'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0':
            evaluation_score['scores'],
        })

        assert np.all(
            np.isclose(evaluation_score['scores'],
                       [0.00089999999999999998, 0.0054000000000000003
                        ])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(
            benchmark_score,
            0.00315), "benchmark_score={}".format(benchmark_score)
예제 #5
0
def test_only_complete_episodes_written():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()
        d = False
        while not d:
            _, _, d, _ = env.step(env.action_space.sample())

        env.reset()
        env.step(env.action_space.sample())

        env.close()

        # Only 1 episode should be written
        results = monitoring.load_results(temp)
        assert len(results['episode_lengths']) == 1, "Found {} episodes written; expecting 1".format(len(results['episode_lengths']))
예제 #6
0
def test_env_reuse():
    with helpers.tempdir() as temp:
        env = gym.make('Autoreset-v0')
        env = Monitor(env, temp)

        env.reset()

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        env.close()
예제 #7
0
def test_no_monitor_reset_unless_done():
    def assert_reset_raises(env):
        errored = False
        try:
            env.reset()
        except error.Error:
            errored = True
        assert errored, "Env allowed a reset when it shouldn't have"

    with helpers.tempdir() as temp:
        # Make sure we can reset as we please without monitor
        env = gym.make('CartPole-v0')
        env.reset()
        env.step(env.action_space.sample())
        env.step(env.action_space.sample())
        env.reset()

        # can reset once as soon as we start
        env = Monitor(env, temp, video_callable=False)
        env.reset()

        # can reset multiple times in a row
        env.reset()
        env.reset()

        env.step(env.action_space.sample())
        env.step(env.action_space.sample())
        assert_reset_raises(env)

        # should allow resets after the episode is done
        d = False
        while not d:
            _, _, d, _ = env.step(env.action_space.sample())

        env.reset()
        env.reset()

        env.step(env.action_space.sample())
        assert_reset_raises(env)

        env.close()