def test_max_seconds(): benchmark = registration.Benchmark(id='TestBenchmark-v0', scorer=scoring.TotalReward(), tasks=[ { 'env_id': 'CartPole-v0', 'trials': 1, 'max_seconds': 1, }, ]) benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0, 0], episode_lengths=[100, 100], episode_rewards=[0, 100], episode_types=['t', 't'], timestamps=[1.5, 2]) _assert_benchmark_result(benchmark_result, score=0.5) # make sure we only include the first result because of wall clock time benchmark_result = _benchmark_result_helper( benchmark, data_sources=[0, 0, 0], episode_lengths=[100, 100, 100], episode_rewards=[0, 100, 100], episode_types=['t', 't', 't'], timestamps=[2, 102, 202]) _assert_benchmark_result(benchmark_result, score=0.0)
def test_clip_scoring(): benchmark = registration.Benchmark(id='TestBenchmark-v0', scorer=scoring.TotalReward(), tasks=[ { 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 5, }, ]) # simple scoring benchmark_result = _benchmark_result_helper(benchmark) _assert_benchmark_result(benchmark_result, score=0.01) # test a successful run benchmark_result = _benchmark_result_helper(benchmark, episode_rewards=[100]) _assert_benchmark_result(benchmark_result, score=1.0, solves=True)
timestamps=[1.5, 2]) _assert_benchmark_result(benchmark_result, score=0.5) # make sure we only include the first result because of wall clock time benchmark_result = _benchmark_result_helper( benchmark, data_sources=[0, 0, 0], episode_lengths=[100, 100, 100], episode_rewards=[0, 100, 100], episode_types=['t', 't', 't'], timestamps=[2, 102, 202]) _assert_benchmark_result(benchmark_result, score=0.0) reward_benchmark = registration.Benchmark(id='TestBenchmark-v0', scorer=scoring.TotalReward(), tasks=[ { 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 5, }, { 'env_id': 'Pendulum-v0', 'trials': 1, 'max_timesteps': 5, }, ]) def test_total_reward_evaluation_scoring():