def test_clip_average_max_seconds(): benchmark = registration.Benchmark( id='TestBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(num_episodes=2), tasks=[ { 'env_id': 'CartPole-v0', 'trials': 1, 'max_seconds': 1, }, ]) benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0, 0], episode_lengths=[100, 100], episode_rewards=[0, 100], episode_types=['t', 't'], timestamps=[1.5, 2]) _assert_benchmark_result(benchmark_result, score=0.5) # make sure we only include the first result because of wall clock time benchmark_result = _benchmark_result_helper( benchmark, data_sources=[0, 0, 0], episode_lengths=[100, 100, 100], episode_rewards=[0, 100, 100], episode_types=['t', 't', 't'], timestamps=[2, 102, 202]) _assert_benchmark_result(benchmark_result, score=0.0)
def test(): benchmark = registration.Benchmark(id='MyBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(), tasks=[{ 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 5 }, { 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 100, }]) with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = wrappers.Monitor(env, directory=temp, video_callable=False) env.seed(0) env.set_monitor_mode('evaluation') rollout(env) env.set_monitor_mode('training') for i in range(2): rollout(env) env.set_monitor_mode('evaluation') rollout(env, good=True) env.close() results = monitoring.load_results(temp) evaluation_score = benchmark.score_evaluation( 'CartPole-v0', results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps']) benchmark_score = benchmark.score_benchmark({ 'CartPole-v0': evaluation_score['scores'], }) assert np.all( np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003 ])), "evaluation_score={}".format(evaluation_score) assert np.isclose( benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
def test_clip_average_evaluation_not_enough_rewards(): benchmark = registration.Benchmark( id='TestBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(num_episodes=2), tasks=[ { 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 5, }, ]) # simple scoring benchmark_result = _benchmark_result_helper(benchmark) _assert_evaluation_result( benchmark_result, score=0.005, rewards=[np.array([1, 0])], lengths=[np.array([1, 0])], )
def test_clip_average_evaluation_scoring(): benchmark = registration.Benchmark( id='TestBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(num_episodes=1), tasks=[ { 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 5, }, ]) # simple scoring benchmark_result = _benchmark_result_helper(benchmark) _assert_benchmark_result(benchmark_result, score=0.01) # test a successful run benchmark_result = _benchmark_result_helper(benchmark, episode_rewards=[100, 100], episode_lengths=[1, 1]) _assert_benchmark_result(benchmark_result, score=1.0, solves=True)
from collections import defaultdict import numpy as np from environments.mujoco.rand_param_envs import gym from environments.mujoco.rand_param_envs.gym.benchmarks import registration, scoring gym.undo_logger_setup() benchmark = registration.Benchmark(id='TestBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(), tasks=[ { 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 100, }, { 'env_id': 'Pendulum-v0', 'trials': 1, 'max_timesteps': 100, }, ]) def _is_close(x, target): return np.all(np.isclose(x, target)) def _eq_list_of_arrays(x, y): return np.all([len(a) == len(b) and np.all(a == b) for a, b in zip(x, y)])