Exemple #1
0
def test_clip_average_max_timesteps():
    benchmark = registration.Benchmark(
        id='TestBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
        tasks=[
            {
                'env_id': 'CartPole-v0',
                'trials': 1,
                'max_timesteps': 2,
            },
        ])

    benchmark_result = _benchmark_result_helper(benchmark,
                                                data_sources=[0, 0],
                                                episode_lengths=[1, 1],
                                                episode_rewards=[1, 1],
                                                episode_types=['t', 't'],
                                                timestamps=[2, 3])
    _assert_benchmark_result(benchmark_result, score=0.01)

    # make sure we only include the first result because of timesteps
    benchmark_result = _benchmark_result_helper(benchmark,
                                                data_sources=[0, 0, 0],
                                                episode_lengths=[1, 100, 100],
                                                episode_rewards=[1, 100, 100],
                                                episode_types=['t', 't', 't'],
                                                timestamps=[2, 102, 202])
    _assert_benchmark_result(benchmark_result, score=0.005, solves=False)
Exemple #2
0
def test_max_seconds():
    benchmark = registration.Benchmark(
        id='TestBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
        tasks=[
            {
                'env_id': 'CartPole-v0',
                'trials': 1,
                'max_seconds': 1,
            },
        ])

    benchmark_result = _benchmark_result_helper(benchmark,
                                                data_sources=[0, 0],
                                                episode_lengths=[100, 100],
                                                episode_rewards=[0, 100],
                                                episode_types=['t', 't'],
                                                timestamps=[1.5, 2])
    assert _is_close(benchmark_result['scores'][0],
                     0.5), "benchmark_result={}".format(benchmark_result)

    # make sure we only include the first result because of wall clock time
    benchmark_result = _benchmark_result_helper(benchmark,
                                                data_sources=[0, 0],
                                                episode_lengths=[100, 100],
                                                episode_rewards=[0, 100],
                                                episode_types=['t', 't'],
                                                timestamps=[2, 100])
    assert _is_close(benchmark_result['scores'][0],
                     0.0), "benchmark_result={}".format(benchmark_result)
Exemple #3
0
def test_total_reward_max_seconds():
    benchmark = registration.Benchmark(id='TestBenchmark-v0',
                                       scorer=scoring.TotalReward(),
                                       tasks=[
                                           {
                                               'env_id': 'CartPole-v0',
                                               'trials': 1,
                                               'max_seconds': 1,
                                           },
                                       ])

    benchmark_result = _benchmark_result_helper(benchmark,
                                                data_sources=[0, 0],
                                                episode_lengths=[100, 100],
                                                episode_rewards=[0, 100],
                                                episode_types=['t', 't'],
                                                timestamps=[1.5, 2])
    _assert_benchmark_result(benchmark_result, score=0.5)

    # make sure we only include the first result because of wall clock time
    benchmark_result = _benchmark_result_helper(
        benchmark,
        data_sources=[0, 0, 0],
        episode_lengths=[100, 100, 100],
        episode_rewards=[0, 100, 100],
        episode_types=['t', 't', 't'],
        timestamps=[2, 102, 202])
    _assert_benchmark_result(benchmark_result, score=0.0)
Exemple #4
0
def test_max_timesteps():
    benchmark = registration.Benchmark(
        id='TestBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
        tasks=[
            {
                'env_id': 'CartPole-v0',
                'trials': 1,
                'max_timesteps': 2,
            },
        ])

    benchmark_result = _benchmark_result_helper(benchmark,
                                                data_sources=[0, 0],
                                                episode_lengths=[1, 1],
                                                episode_rewards=[1, 1],
                                                episode_types=['t', 't'],
                                                timestamps=[2, 3])
    assert _is_close(benchmark_result['scores'][0],
                     0.01), "benchmark_result={}".format(benchmark_result)

    # make sure we only include the first result because of timesteps
    benchmark_result = _benchmark_result_helper(benchmark,
                                                data_sources=[0, 0],
                                                episode_lengths=[1, 100],
                                                episode_rewards=[1, 100],
                                                episode_types=['t', 't'],
                                                timestamps=[2, 102])
    assert _is_close(benchmark_result['scores'][0],
                     0.005), "benchmark_result={}".format(benchmark_result)
    assert not np.any(benchmark_result['solves']
                      [0]), "benchmark_result={}".format(benchmark_result)
Exemple #5
0
def test():
    benchmark = registration.Benchmark(id='MyBenchmark-v0',
                                       scorer=scoring.ClipTo01ThenAverage(),
                                       tasks=[{
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 5
                                       }, {
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 100,
                                       }])

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = wrappers.Monitor(directory=temp, video_callable=False)(env)
        env.seed(0)

        env.set_monitor_mode('evaluation')
        rollout(env)

        env.set_monitor_mode('training')
        for i in range(2):
            rollout(env)

        env.set_monitor_mode('evaluation')
        rollout(env, good=True)

        env.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation(
            'CartPole-v0', results['data_sources'],
            results['initial_reset_timestamps'], results['episode_lengths'],
            results['episode_rewards'], results['episode_types'],
            results['timestamps'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0':
            evaluation_score['scores'],
        })

        assert np.all(
            np.isclose(evaluation_score['scores'],
                       [0.00089999999999999998, 0.0054000000000000003
                        ])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(
            benchmark_score,
            0.00315), "benchmark_score={}".format(benchmark_score)
def test():
    benchmark = registration.Benchmark(id='MyBenchmark-v0',
                                       scorer=scoring.ClipTo01ThenAverage(),
                                       task_groups={
                                           'CartPole-v0': [{
                                               'seeds': 1,
                                               'timesteps': 5
                                           }, {
                                               'seeds': 1,
                                               'timesteps': 100
                                           }],
                                       })

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env.monitor.start(temp, video_callable=False, seed=0)

        env.monitor.configure(mode='evaluation')
        rollout(env)

        env.monitor.configure(mode='training')
        for i in range(2):
            rollout(env)

        env.monitor.configure(mode='evaluation')
        rollout(env, good=True)

        env.monitor.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation(
            'CartPole-v0', results['episode_lengths'],
            results['episode_rewards'], results['episode_types'],
            results['timestamps'], results['initial_reset_timestamp'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0':
            evaluation_score['scores'],
        })

        assert np.all(
            np.isclose(evaluation_score['scores'],
                       [0.00089999999999999998, 0.0054000000000000003
                        ])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(
            benchmark_score,
            0.00315), "benchmark_score={}".format(benchmark_score)
Exemple #7
0
def test_total_reward_clip_scoring():
    benchmark = registration.Benchmark(id='TestBenchmark-v0',
                                       scorer=scoring.TotalReward(),
                                       tasks=[
                                           {
                                               'env_id': 'CartPole-v0',
                                               'trials': 1,
                                               'max_timesteps': 5,
                                           },
                                       ])
    # simple scoring
    benchmark_result = _benchmark_result_helper(benchmark)
    _assert_benchmark_result(benchmark_result, score=0.01)

    # test a successful run
    benchmark_result = _benchmark_result_helper(benchmark,
                                                episode_rewards=[100])
    _assert_benchmark_result(benchmark_result, score=1.0, solves=True)
Exemple #8
0
def test_clip_average_evaluation_scoring():
    benchmark = registration.Benchmark(
        id='TestBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(num_episodes=1),
        tasks=[
            {'env_id': 'CartPole-v0',
             'trials': 1,
             'max_timesteps': 5,
            },
        ]
    )
    # simple scoring
    benchmark_result = _benchmark_result_helper(benchmark)
    _assert_benchmark_result(benchmark_result, score=0.01)

    # test a successful run
    benchmark_result = _benchmark_result_helper(benchmark, episode_rewards=[100, 100], episode_lengths=[1, 1])
    _assert_benchmark_result(benchmark_result, score=1.0, solves=True)
Exemple #9
0
def test_clip_average_evaluation_not_enough_rewards():
    benchmark = registration.Benchmark(
        id='TestBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
        tasks=[
            {
                'env_id': 'CartPole-v0',
                'trials': 1,
                'max_timesteps': 5,
            },
        ])
    # simple scoring
    benchmark_result = _benchmark_result_helper(benchmark)
    _assert_evaluation_result(
        benchmark_result,
        score=0.005,
        rewards=[np.array([1, 0])],
        lengths=[np.array([1, 0])],
    )
Exemple #10
0
def test_clip_scoring():
    benchmark = registration.Benchmark(
        id='TestBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(num_episodes=1),
        tasks=[
            {
                'env_id': 'CartPole-v0',
                'trials': 1,
                'max_timesteps': 5,
            },
        ])
    # simple scoring
    benchmark_result = _benchmark_result_helper(benchmark)
    assert _is_close(benchmark_result['scores'][0],
                     0.01), "benchmark_result={}".format(benchmark_result)

    # test a successful run
    benchmark_result = _benchmark_result_helper(benchmark,
                                                episode_rewards=[100])
    assert _is_close(benchmark_result['scores'][0],
                     1.0), "benchmark_result={}".format(benchmark_result)
    assert np.all(benchmark_result['solves'][0]), "benchmark_result={}".format(
        benchmark_result)
Exemple #11
0
import numpy as np
from collections import defaultdict
from gym.benchmarks import registration, scoring

import gym
gym.undo_logger_setup()

benchmark = registration.Benchmark(id='TestBenchmark-v0',
                                   scorer=scoring.ClipTo01ThenAverage(),
                                   tasks=[
                                       {
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 100,
                                       },
                                       {
                                           'env_id': 'Pendulum-v0',
                                           'trials': 1,
                                           'max_timesteps': 100,
                                       },
                                   ])


def _is_close(x, target):
    return np.all(np.isclose(x, target))


def _assert_benchmark_result(result,
                             score=None,
                             solves=None,
                             summed_training_seconds=None,