def test_total_reward_max_seconds(): benchmark = registration.Benchmark(id='TestBenchmark-v0', scorer=scoring.TotalReward(), tasks=[ { 'env_id': 'CartPole-v0', 'trials': 1, 'max_seconds': 1, }, ]) benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0, 0], episode_lengths=[100, 100], episode_rewards=[0, 100], episode_types=['t', 't'], timestamps=[1.5, 2]) _assert_benchmark_result(benchmark_result, score=0.5) # make sure we only include the first result because of wall clock time benchmark_result = _benchmark_result_helper( benchmark, data_sources=[0, 0, 0], episode_lengths=[100, 100, 100], episode_rewards=[0, 100, 100], episode_types=['t', 't', 't'], timestamps=[2, 102, 202]) _assert_benchmark_result(benchmark_result, score=0.0)
def test_max_timesteps(): benchmark = registration.Benchmark(id='TestBenchmark-v0', scorer=scoring.TotalReward(), tasks=[ { 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 2, }, ]) benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0, 0], episode_lengths=[1, 1], episode_rewards=[1, 1], episode_types=['t', 't'], timestamps=[2, 3]) _assert_benchmark_result(benchmark_result, score=0.01) # make sure we only include the first result because of timesteps benchmark_result = _benchmark_result_helper(benchmark, data_sources=[0, 0, 0], episode_lengths=[1, 100, 100], episode_rewards=[1, 100, 100], episode_types=['t', 't', 't'], timestamps=[2, 102, 202]) _assert_benchmark_result(benchmark_result, score=0.01, solves=False)
def test_total_reward_clip_scoring(): benchmark = registration.Benchmark(id='TestBenchmark-v0', scorer=scoring.TotalReward(), tasks=[ { 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 5, }, ]) # simple scoring benchmark_result = _benchmark_result_helper(benchmark) _assert_benchmark_result(benchmark_result, score=0.01) # test a successful run benchmark_result = _benchmark_result_helper(benchmark, episode_rewards=[100]) _assert_benchmark_result(benchmark_result, score=1.0, solves=True)
episode_types=['e', 't', 'e'], timestamps=[i + 2, i + 3, i + 4], )) scores = scoring.benchmark_aggregate_score(benchmark, benchmark_results) _assert_benchmark_score(scores, score=0.0004, num_envs_solved=0, summed_training_seconds=5.0, summed_task_wall_time=5.0, start_to_finish_seconds=3.0) # Tests for total reward scoring reward_benchmark = registration.Benchmark(id='TestBenchmark-v0', scorer=scoring.TotalReward(), tasks=[ { 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 5, }, { 'env_id': 'Pendulum-v0', 'trials': 1, 'max_timesteps': 5, }, ]) def test_total_reward_clip_scoring():