def test_database_runner(self): dbs = DatabaseSerializer(test_scenarios=4, test_world_steps=5, num_serialize_scenarios=2) dbs.process("data/database1") local_release_filename = dbs.release(version="test") db = BenchmarkDatabase(database_root=local_release_filename) evaluators = {"success" : "EvaluatorGoalReached", "collision" : "EvaluatorCollisionEgoAgent", "max_steps": "EvaluatorStepCount"} terminal_when = {"collision" :lambda x: x, "max_steps": lambda x : x>2} params = ParameterServer() # only for evaluated agents not passed to scenario! behaviors_tested = {"IDM": BehaviorIDMClassic(params), "Const" : BehaviorConstantAcceleration(params)} benchmark_runner = BenchmarkRunner(benchmark_database=db, evaluators=evaluators, terminal_when=terminal_when, behaviors=behaviors_tested, log_eval_avg_every=5) result = benchmark_runner.run() df = result.get_data_frame() print(df) self.assertEqual(len(df.index), 2*2*2) # 2 Behaviors * 2 Serialize Scenarios * 1 scenario sets groups = result.get_evaluation_groups() self.assertEqual(set(groups), set(["behavior", "scen_set"]))
def test_database_runner(self): dbs = DatabaseSerializer(test_scenarios=4, test_world_steps=5, num_serialize_scenarios=2) dbs.process("data/database1") local_release_filename = dbs.release(version="test") db = BenchmarkDatabase(database_root=local_release_filename) safe_dist_params = ParameterServer(log_if_default=True) test_python_params = ParameterServer(log_if_default=True) evaluators = { "success": "EvaluatorGoalReached", "collision": "EvaluatorCollisionEgoAgent", "max_steps": "EvaluatorStepCount", "safe_dist_lon": { "type": "EvaluatorDynamicSafeDist", "params": safe_dist_params }, "safe_dist_lat": { "type": "EvaluatorStaticSafeDist", "params": safe_dist_params }, "test_evaluator": { "type": "TestPythonEvaluator", "params": test_python_params }, "test_evaluator_serializable": TestPythonEvaluatorSerializable() } terminal_when = { "collision": lambda x: x, "max_steps": lambda x: x > 2, "safe_dist_lon": lambda x: x } params = ParameterServer( ) # only for evaluated agents not passed to scenario! behaviors_tested = { "IDM": BehaviorIDMClassic(params), "Const": BehaviorConstantAcceleration(params) } EvaluationConfig.AddEvaluationModule( "bark.benchmark.tests.test_evaluator") benchmark_runner = BenchmarkRunner(benchmark_database=db, evaluators=evaluators, log_eval_avg_every=1, terminal_when=terminal_when, behaviors=behaviors_tested) result = benchmark_runner.run() df = result.get_data_frame() print(df) self.assertEqual( len(df.index), 2 * 2 * 2) # 2 Behaviors * 2 Serialize Scenarios * 1 scenario sets result.load_benchmark_configs() groups = result.get_evaluation_groups() self.assertEqual(set(groups), set(["behavior", "scen_set"]))
def test_database_runner_checkpoint(self): dbs = DatabaseSerializer(test_scenarios=4, test_world_steps=5, num_serialize_scenarios=10) dbs.process("data/database1") local_release_filename = dbs.release(version="test") db = BenchmarkDatabase(database_root=local_release_filename) evaluators = {"success" : "EvaluatorGoalReached", "collision" : "EvaluatorCollisionEgoAgent", "max_steps": "EvaluatorStepCount"} terminal_when = {"collision" :lambda x: x, "max_steps": lambda x : x>2} params = ParameterServer() # only for evaluated agents not passed to scenario! behaviors_tested = {"IDM": BehaviorIDMClassic(params), "Const" : BehaviorConstantAcceleration(params)} benchmark_runner = BenchmarkRunner(benchmark_database=db, evaluators=evaluators, terminal_when=terminal_when, behaviors=behaviors_tested, log_eval_avg_every=20, checkpoint_dir="checkpoints1/") # one run after 30 steps benchmark dumped result = benchmark_runner.run(checkpoint_every = 30) df = result.get_data_frame() print(df) self.assertEqual(len(df.index), 40) # 2 Behaviors * 10 Serialize Scenarios * 2 scenario sets # check twice first, merging from checkpoints merged_result = BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir="checkpoints1/") df = merged_result.get_data_frame() self.assertEqual(len(df.index), 30) # second load merged results self.assertTrue(os.path.exists(os.path.join("checkpoints1/merged_results.ckpnt"))) merged_result = BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir="checkpoints1/") df = merged_result.get_data_frame() self.assertEqual(len(df.index), 30) configs_to_run = BenchmarkRunner.get_configs_to_run(benchmark_runner.configs_to_run, merged_result) self.assertEqual(len(configs_to_run), 10) benchmark_runner2 = BenchmarkRunner(benchmark_database=db, evaluators=evaluators, terminal_when=terminal_when, behaviors=behaviors_tested, log_eval_avg_every=1, checkpoint_dir="checkpoints1/", merge_existing=True) result = benchmark_runner2.run(checkpoint_every = 7) df = result.get_data_frame() print(df) self.assertEqual(len(df.index), 40) # 2 Behaviors * 10 Serialize Scenarios * 2 scenario sets # check if results maintained in existing result dump, 30 from previous run + 7 after new checkpoint merged_result = BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir="checkpoints1/") df = merged_result.get_data_frame() self.assertEqual(len(df.index), 37)
def test_database_multiprocessing_runner_checkpoint(self): dbs = DatabaseSerializer(test_scenarios=1, test_world_steps=2, num_serialize_scenarios=10) dbs.process("data/database1") local_release_filename = dbs.release(version="test") db = BenchmarkDatabase(database_root=local_release_filename) evaluators = {"success" : "EvaluatorGoalReached", "collision" : "EvaluatorCollisionEgoAgent", "max_steps": "EvaluatorStepCount"} terminal_when = {"collision" :lambda x: x, "max_steps": lambda x : x>2} params = ParameterServer() # only for evaluated agents not passed to scenario! behaviors_tested = {"IDM": BehaviorIDMClassic(params), "Const" : BehaviorConstantAcceleration(params)} benchmark_runner = BenchmarkRunnerMP(benchmark_database=db, evaluators=evaluators, terminal_when=terminal_when, behaviors=behaviors_tested, log_eval_avg_every=10, num_cpus=4, checkpoint_dir="checkpoints2/", merge_existing=False) benchmark_runner.clear_checkpoint_dir() # one run after 30 steps benchmark dumped result = benchmark_runner.run(checkpoint_every = 3) df = result.get_data_frame() print(df) self.assertEqual(len(df.index), 40) # 2 Behaviors * 10 Serialize Scenarios * 2 scenario sets merged_result = BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir="checkpoints2/") df = merged_result.get_data_frame() self.assertEqual(len(df.index), 4*9) # self.assertEqual(len(merged_result.get_histories()), 4*9) self.assertEqual(len(merged_result.get_benchmark_configs()), 4*9) configs_to_run = BenchmarkRunner.get_configs_to_run(benchmark_runner.configs_to_run, merged_result) self.assertEqual(len(configs_to_run), 4) ray.shutdown() benchmark_runner2 = BenchmarkRunnerMP(benchmark_database=db, evaluators=evaluators, terminal_when=terminal_when, behaviors=behaviors_tested, log_eval_avg_every=1, checkpoint_dir="checkpoints2/", merge_existing=True) result = benchmark_runner2.run(checkpoint_every = 1) df = result.get_data_frame() print(df) self.assertEqual(len(df.index), 40) # 2 Behaviors * 10 Serialize Scenarios * 2 scenario sets # check if existing result is incorporated for mergin result merged_result = BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir="checkpoints2/") df = merged_result.get_data_frame() self.assertEqual(len(df.index), 40)
def test_database_runner_python_behavior(self): dbs = DatabaseSerializer(test_scenarios=4, test_world_steps=5, num_serialize_scenarios=2) dbs.process("data/database1") local_release_filename = dbs.release(version="test") db = BenchmarkDatabase(database_root=local_release_filename) safe_dist_params = ParameterServer(log_if_default=True) evaluators = { "success": "EvaluatorGoalReached", "collision": "EvaluatorCollisionEgoAgent", "max_steps": "EvaluatorStepCount", "safe_dist_lon": { "type": "EvaluatorDynamicSafeDist", "params": safe_dist_params }, "safe_dist_lat": { "type": "EvaluatorStaticSafeDist", "params": safe_dist_params } } terminal_when = { "collision": lambda x: x, "max_steps": lambda x: x > 2, "safe_dist_lon": lambda x: x } params = ParameterServer( ) # only for evaluated agents not passed to scenario! behaviors_tested = {"python_behavior": PythonDistanceBehavior(params)} benchmark_runner = BenchmarkRunner(benchmark_database=db, evaluators=evaluators, terminal_when=terminal_when, behaviors=behaviors_tested, log_eval_avg_every=5, deepcopy=False) benchmark_runner.clear_checkpoint_dir() result = benchmark_runner.run() df = result.get_data_frame() print(df) self.assertEqual( len(df.index), 1 * 2 * 2) # 2 Behaviors * 2 Serialize Scenarios * 1 scenario sets result.load_benchmark_configs() groups = result.get_evaluation_groups() self.assertEqual(set(groups), set(["behavior", "scen_set"]))
def reset(self, training_env, num_episodes, max_episode_steps, agent): super(TrainingBenchmarkDatabase, self).reset(training_env, num_episodes, \ max_episode_steps, agent) benchmark_configs = self.create_benchmark_configs(num_episodes) evaluators = default_training_evaluators() if self.evaluators: evaluators = {**self.evaluators, **evaluators} terminal_when = default_terminal_criteria(max_episode_steps) if self.terminal_when: terminal_when = {**self.terminal_when, **terminal_when} self.benchmark_runner = BenchmarkRunner( benchmark_configs=benchmark_configs, evaluators=evaluators, terminal_when=terminal_when, num_scenarios=num_episodes, log_eval_avg_every=100000000000, checkpoint_dir="checkpoints", merge_existing=False, deepcopy=False)
def reset(self, training_env, num_episodes, max_episode_steps, agent): super(TrainingBenchmarkDatabase, self).reset(training_env, num_episodes, \ max_episode_steps, agent) evaluators = default_training_evaluators() if self.evaluators: evaluators = {**self.evaluators, **evaluators} terminal_when = default_terminal_criteria(max_episode_steps) if self.terminal_when: terminal_when = {**self.terminal_when, **terminal_when} self.benchmark_runner = BenchmarkRunner( benchmark_database=self. database, # this has priority over scenario generation scenario_generation=self.training_env._scenario_generator, evaluators=evaluators, terminal_when=terminal_when, num_scenarios=num_episodes, log_eval_avg_every=100000000000, checkpoint_dir="checkpoints", merge_existing=False, deepcopy=False)
class TrainingBenchmarkDatabase(TrainingBenchmark): def __init__(self, benchmark_database=None, evaluators=None, terminal_when=None): self.database = benchmark_database self.evaluators = evaluators self.terminal_when = terminal_when def reset(self, training_env, num_episodes, max_episode_steps, agent): super(TrainingBenchmarkDatabase, self).reset(training_env, num_episodes, \ max_episode_steps, agent) evaluators = default_training_evaluators() if self.evaluators: evaluators = {**self.evaluators, **evaluators} terminal_when = default_terminal_criteria(max_episode_steps) if self.terminal_when: terminal_when = {**self.terminal_when, **terminal_when} self.benchmark_runner = BenchmarkRunner( benchmark_database=self. database, # this has priority over scenario generation scenario_generation=self.training_env._scenario_generator, evaluators=evaluators, terminal_when=terminal_when, num_scenarios=num_episodes, log_eval_avg_every=100000000000, checkpoint_dir="checkpoints", merge_existing=False, deepcopy=False) def run(self): mean_return, formatting = super(TrainingBenchmarkDatabase, self).run() eval_result = self.benchmark_runner.run() data_frame = eval_result.get_data_frame() data_frame["max_steps"] = data_frame.Terminal.apply( lambda x: "max_steps" in x and (not "collision" in x)) data_frame["success"] = data_frame.Terminal.apply( lambda x: "success" in x and (not "collision" in x) and (not "max_steps" in x)) data_frame = data_frame.drop(columns=[ "scen_set", "scen_idx", "behavior", "Terminal", "step", "config_idx" ]) mean = data_frame.mean(axis=0) eval_result = {**mean.to_dict(), **mean_return} return eval_result, f"Benchmark Result: {eval_result}" def is_better(self, eval_result1, than_eval_result2): pass
def test_database_runner_checkpoint(self): dbs = DatabaseSerializer(test_scenarios=4, test_world_steps=5, num_serialize_scenarios=10) dbs.process("data/database1") local_release_filename = dbs.release(version="test") db = BenchmarkDatabase(database_root=local_release_filename) evaluators = { "success": "EvaluatorGoalReached", "collision": "EvaluatorCollisionEgoAgent", "max_steps": "EvaluatorStepCount" } terminal_when = { "collision": lambda x: x, "max_steps": lambda x: x > 2 } params = ParameterServer( ) # only for evaluated agents not passed to scenario! behaviors_tested = { "IDM": BehaviorIDMClassic(params), "Const": BehaviorConstantAcceleration(params) } benchmark_runner = BenchmarkRunner(benchmark_database=db, evaluators=evaluators, terminal_when=terminal_when, behaviors=behaviors_tested, log_eval_avg_every=20, checkpoint_dir="checkpoints1/") benchmark_runner.clear_checkpoint_dir() # one run after 30 steps benchmark dumped result = benchmark_runner.run(checkpoint_every=30) df = result.get_data_frame() print(df) self.assertEqual( len(df.index), 40) # 2 Behaviors * 10 Serialize Scenarios * 2 scenario sets # check twice first, merging from checkpoints merged_result = BenchmarkRunner.merge_checkpoint_benchmark_results( checkpoint_dir="checkpoints1/") df = merged_result.get_data_frame() self.assertEqual(len(df.index), 40)
class TrainingBenchmarkDatabase(TrainingBenchmark): def __init__(self, benchmark_database=None, evaluators=None, terminal_when=None): self.database = benchmark_database self.evaluators = evaluators self.terminal_when = terminal_when def create_benchmark_configs(self, num_scenarios): benchmark_configs = [] if self.database: for scenario_generator, scenario_set_name, scenario_set_param_desc in self.database: benchmark_configs.extend(self.benchmark_configs_from_scen_gen( \ scenario_generator, scenario_set_name, \ scenario_set_param_desc, num_scenarios)) else: scenario_generator = self.training_env._scenario_generator benchmark_configs.extend(self.benchmark_configs_from_scen_gen( scenario_generator, "training_env", \ {}, num_scenarios)) return benchmark_configs def benchmark_configs_from_scen_gen(self, scenario_generator, scenario_set_name, \ scenario_set_param_desc, num_scenarios): benchmark_configs = [] for scenario, scenario_idx in scenario_generator: if num_scenarios and scenario_idx >= num_scenarios: break behavior_config = BehaviorConfig("agent", self.agent, None) benchmark_config = \ BenchmarkConfig( len(benchmark_configs), behavior_config, scenario, scenario_idx, scenario_set_name, scenario_set_param_desc ) benchmark_configs.append(benchmark_config) return benchmark_configs def reset(self, training_env, num_episodes, max_episode_steps, agent): super(TrainingBenchmarkDatabase, self).reset(training_env, num_episodes, \ max_episode_steps, agent) benchmark_configs = self.create_benchmark_configs(num_episodes) evaluators = default_training_evaluators() if self.evaluators: evaluators = {**self.evaluators, **evaluators} terminal_when = default_terminal_criteria(max_episode_steps) if self.terminal_when: terminal_when = {**self.terminal_when, **terminal_when} self.benchmark_runner = BenchmarkRunner( benchmark_configs=benchmark_configs, evaluators=evaluators, terminal_when=terminal_when, num_scenarios=num_episodes, log_eval_avg_every=100000000000, checkpoint_dir="checkpoints", merge_existing=False, deepcopy=False) def run(self): mean_return, formatting = super(TrainingBenchmarkDatabase, self).run() eval_result = self.benchmark_runner.run() data_frame = eval_result.get_data_frame() data_frame["max_steps"] = data_frame.Terminal.apply( lambda x: "max_steps" in x and (not "collision" in x)) data_frame["success"] = data_frame.Terminal.apply( lambda x: "success" in x and (not "collision" in x) and (not "max_steps" in x)) data_frame = data_frame.drop(columns=[ "scen_set", "scen_idx", "behavior", "Terminal", "step", "config_idx" ]) mean = data_frame.mean(axis=0) eval_result = {**mean.to_dict(), **mean_return} return eval_result, f"Benchmark Result: {eval_result}" def is_better(self, eval_result1, than_eval_result2): pass
try: from bark.core.models.behavior import BehaviorUCTSingleAgent behavior_used = BehaviorUCTSingleAgent except: print( "BehaviorUCTSingleAgent not available, rerun example with `bazel run //examples:benchmark_database --define planner_uct=true" ) exit() db = BenchmarkDatabase(database_root="external/benchmark_database_release") evaluators = { "success": EvaluatorGoalReached, "collision": EvaluatorCollisionEgoAgent, "max_steps": EvaluatorStepCount } terminal_when = {"collision": lambda x: x, "max_steps": lambda x: x > 2} scenario_param_file = "uct_planner.json" # must be within examples params folder params = ParameterServer( filename=os.path.join("examples/params/", scenario_param_file)) behaviors_tested = {"search5s": behavior_used(params)} benchmark_runner = BenchmarkRunner(benchmark_database=db, evaluators=evaluators, terminal_when=terminal_when, behaviors=behaviors_tested) benchmark_runner.run(1) benchmark_runner.dataframe.to_pickle("uct_planner_results.pickle")