def test_clip_average_max_seconds():
    benchmark = registration.Benchmark(
        id='TestBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
        tasks=[
            {
                'env_id': 'CartPole-v0',
                'trials': 1,
                'max_seconds': 1,
            },
        ])

    benchmark_result = _benchmark_result_helper(benchmark,
                                                data_sources=[0, 0],
                                                episode_lengths=[100, 100],
                                                episode_rewards=[0, 100],
                                                episode_types=['t', 't'],
                                                timestamps=[1.5, 2])
    _assert_benchmark_result(benchmark_result, score=0.5)

    # make sure we only include the first result because of wall clock time
    benchmark_result = _benchmark_result_helper(
        benchmark,
        data_sources=[0, 0, 0],
        episode_lengths=[100, 100, 100],
        episode_rewards=[0, 100, 100],
        episode_types=['t', 't', 't'],
        timestamps=[2, 102, 202])
    _assert_benchmark_result(benchmark_result, score=0.0)
Beispiel #2
0
def test():
    benchmark = registration.Benchmark(id='MyBenchmark-v0',
                                       scorer=scoring.ClipTo01ThenAverage(),
                                       tasks=[{
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 5
                                       }, {
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 100,
                                       }])

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = wrappers.Monitor(env, directory=temp, video_callable=False)
        env.seed(0)

        env.set_monitor_mode('evaluation')
        rollout(env)

        env.set_monitor_mode('training')
        for i in range(2):
            rollout(env)

        env.set_monitor_mode('evaluation')
        rollout(env, good=True)

        env.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation(
            'CartPole-v0', results['data_sources'],
            results['initial_reset_timestamps'], results['episode_lengths'],
            results['episode_rewards'], results['episode_types'],
            results['timestamps'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0':
            evaluation_score['scores'],
        })

        assert np.all(
            np.isclose(evaluation_score['scores'],
                       [0.00089999999999999998, 0.0054000000000000003
                        ])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(
            benchmark_score,
            0.00315), "benchmark_score={}".format(benchmark_score)
def test_clip_average_evaluation_not_enough_rewards():
    benchmark = registration.Benchmark(
        id='TestBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(num_episodes=2),
        tasks=[
            {
                'env_id': 'CartPole-v0',
                'trials': 1,
                'max_timesteps': 5,
            },
        ])
    # simple scoring
    benchmark_result = _benchmark_result_helper(benchmark)
    _assert_evaluation_result(
        benchmark_result,
        score=0.005,
        rewards=[np.array([1, 0])],
        lengths=[np.array([1, 0])],
    )
def test_clip_average_evaluation_scoring():
    benchmark = registration.Benchmark(
        id='TestBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(num_episodes=1),
        tasks=[
            {
                'env_id': 'CartPole-v0',
                'trials': 1,
                'max_timesteps': 5,
            },
        ])
    # simple scoring
    benchmark_result = _benchmark_result_helper(benchmark)
    _assert_benchmark_result(benchmark_result, score=0.01)

    # test a successful run
    benchmark_result = _benchmark_result_helper(benchmark,
                                                episode_rewards=[100, 100],
                                                episode_lengths=[1, 1])
    _assert_benchmark_result(benchmark_result, score=1.0, solves=True)
import numpy as np
from collections import defaultdict
from rand_param_envs.gym.benchmarks import registration, scoring

from rand_param_envs import gym

gym.undo_logger_setup()

benchmark = registration.Benchmark(id='TestBenchmark-v0',
                                   scorer=scoring.ClipTo01ThenAverage(),
                                   tasks=[
                                       {
                                           'env_id': 'CartPole-v0',
                                           'trials': 1,
                                           'max_timesteps': 100,
                                       },
                                       {
                                           'env_id': 'Pendulum-v0',
                                           'trials': 1,
                                           'max_timesteps': 100,
                                       },
                                   ])


def _is_close(x, target):
    return np.all(np.isclose(x, target))


def _eq_list_of_arrays(x, y):
    return np.all([len(a) == len(b) and np.all(a == b) for a, b in zip(x, y)])