Beispiel #1
0
    def test_database_runner(self):
        dbs = DatabaseSerializer(test_scenarios=4, test_world_steps=5, num_serialize_scenarios=2)
        dbs.process("data/database1")
        local_release_filename = dbs.release(version="test")

        db = BenchmarkDatabase(database_root=local_release_filename)
        evaluators = {"success" : "EvaluatorGoalReached", "collision" : "EvaluatorCollisionEgoAgent",
                      "max_steps": "EvaluatorStepCount"}
        terminal_when = {"collision" :lambda x: x, "max_steps": lambda x : x>2}
        params = ParameterServer() # only for evaluated agents not passed to scenario!
        behaviors_tested = {"IDM": BehaviorIDMClassic(params), "Const" : BehaviorConstantAcceleration(params)}
                                        

        benchmark_runner = BenchmarkRunner(benchmark_database=db,
                                           evaluators=evaluators,
                                           terminal_when=terminal_when,
                                           behaviors=behaviors_tested,
                                           log_eval_avg_every=5)

        result = benchmark_runner.run()
        df = result.get_data_frame()
        print(df)
        self.assertEqual(len(df.index), 2*2*2) # 2 Behaviors * 2 Serialize Scenarios * 1 scenario sets

        groups = result.get_evaluation_groups()
        self.assertEqual(set(groups), set(["behavior", "scen_set"]))
    def test_database_runner(self):
        dbs = DatabaseSerializer(test_scenarios=4,
                                 test_world_steps=5,
                                 num_serialize_scenarios=2)
        dbs.process("data/database1")
        local_release_filename = dbs.release(version="test")

        db = BenchmarkDatabase(database_root=local_release_filename)
        safe_dist_params = ParameterServer(log_if_default=True)
        test_python_params = ParameterServer(log_if_default=True)
        evaluators = {
            "success": "EvaluatorGoalReached",
            "collision": "EvaluatorCollisionEgoAgent",
            "max_steps": "EvaluatorStepCount",
            "safe_dist_lon": {
                "type": "EvaluatorDynamicSafeDist",
                "params": safe_dist_params
            },
            "safe_dist_lat": {
                "type": "EvaluatorStaticSafeDist",
                "params": safe_dist_params
            },
            "test_evaluator": {
                "type": "TestPythonEvaluator",
                "params": test_python_params
            },
            "test_evaluator_serializable": TestPythonEvaluatorSerializable()
        }
        terminal_when = {
            "collision": lambda x: x,
            "max_steps": lambda x: x > 2,
            "safe_dist_lon": lambda x: x
        }
        params = ParameterServer(
        )  # only for evaluated agents not passed to scenario!
        behaviors_tested = {
            "IDM": BehaviorIDMClassic(params),
            "Const": BehaviorConstantAcceleration(params)
        }

        EvaluationConfig.AddEvaluationModule(
            "bark.benchmark.tests.test_evaluator")
        benchmark_runner = BenchmarkRunner(benchmark_database=db,
                                           evaluators=evaluators,
                                           log_eval_avg_every=1,
                                           terminal_when=terminal_when,
                                           behaviors=behaviors_tested)

        result = benchmark_runner.run()
        df = result.get_data_frame()
        print(df)
        self.assertEqual(
            len(df.index),
            2 * 2 * 2)  # 2 Behaviors * 2 Serialize Scenarios * 1 scenario sets
        result.load_benchmark_configs()
        groups = result.get_evaluation_groups()
        self.assertEqual(set(groups), set(["behavior", "scen_set"]))
Beispiel #3
0
    def test_database_runner_checkpoint(self):
        dbs = DatabaseSerializer(test_scenarios=4, test_world_steps=5, num_serialize_scenarios=10)
        dbs.process("data/database1")
        local_release_filename = dbs.release(version="test")

        db = BenchmarkDatabase(database_root=local_release_filename)
        evaluators = {"success" : "EvaluatorGoalReached", "collision" : "EvaluatorCollisionEgoAgent",
                      "max_steps": "EvaluatorStepCount"}
        terminal_when = {"collision" :lambda x: x, "max_steps": lambda x : x>2}
        params = ParameterServer() # only for evaluated agents not passed to scenario!
        behaviors_tested = {"IDM": BehaviorIDMClassic(params), "Const" : BehaviorConstantAcceleration(params)}
                                        

        benchmark_runner = BenchmarkRunner(benchmark_database=db,
                                           evaluators=evaluators,
                                           terminal_when=terminal_when,
                                           behaviors=behaviors_tested,
                                           log_eval_avg_every=20,
                                           checkpoint_dir="checkpoints1/")

        # one run after 30 steps benchmark dumped
        result = benchmark_runner.run(checkpoint_every = 30)
        df = result.get_data_frame()
        print(df)
        self.assertEqual(len(df.index), 40) # 2 Behaviors * 10 Serialize Scenarios * 2 scenario sets
        # check twice first, merging from checkpoints
        merged_result = BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir="checkpoints1/")
        df = merged_result.get_data_frame()
        self.assertEqual(len(df.index), 30)
        # second load merged results
        self.assertTrue(os.path.exists(os.path.join("checkpoints1/merged_results.ckpnt")))
        merged_result = BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir="checkpoints1/")
        df = merged_result.get_data_frame()
        self.assertEqual(len(df.index), 30)

        configs_to_run = BenchmarkRunner.get_configs_to_run(benchmark_runner.configs_to_run, merged_result)
        self.assertEqual(len(configs_to_run), 10)

        benchmark_runner2 = BenchmarkRunner(benchmark_database=db,
                                           evaluators=evaluators,
                                           terminal_when=terminal_when,
                                           behaviors=behaviors_tested,
                                           log_eval_avg_every=1,
                                           checkpoint_dir="checkpoints1/",
                                           merge_existing=True)

        result = benchmark_runner2.run(checkpoint_every = 7)
        df = result.get_data_frame()
        print(df)
        self.assertEqual(len(df.index), 40) # 2 Behaviors * 10 Serialize Scenarios * 2 scenario sets

        # check if results maintained in existing result dump, 30 from previous run + 7 after new checkpoint
        merged_result = BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir="checkpoints1/")
        df = merged_result.get_data_frame()
        self.assertEqual(len(df.index), 37)
Beispiel #4
0
    def test_database_multiprocessing_runner_checkpoint(self):
        dbs = DatabaseSerializer(test_scenarios=1, test_world_steps=2, num_serialize_scenarios=10)
        dbs.process("data/database1")
        local_release_filename = dbs.release(version="test")

        db = BenchmarkDatabase(database_root=local_release_filename)
        evaluators = {"success" : "EvaluatorGoalReached", "collision" : "EvaluatorCollisionEgoAgent",
                      "max_steps": "EvaluatorStepCount"}
        terminal_when = {"collision" :lambda x: x, "max_steps": lambda x : x>2}
        params = ParameterServer() # only for evaluated agents not passed to scenario!
        behaviors_tested = {"IDM": BehaviorIDMClassic(params), "Const" : BehaviorConstantAcceleration(params)}
                                        

        benchmark_runner = BenchmarkRunnerMP(benchmark_database=db,
                                           evaluators=evaluators,
                                           terminal_when=terminal_when,
                                           behaviors=behaviors_tested,
                                           log_eval_avg_every=10,
                                           num_cpus=4,
                                           checkpoint_dir="checkpoints2/",
                                           merge_existing=False)
        benchmark_runner.clear_checkpoint_dir()
        # one run after 30 steps benchmark dumped
        result = benchmark_runner.run(checkpoint_every = 3)
        df = result.get_data_frame()
        print(df)
        self.assertEqual(len(df.index), 40) # 2 Behaviors * 10 Serialize Scenarios * 2 scenario sets

        merged_result = BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir="checkpoints2/")
        df = merged_result.get_data_frame()
        self.assertEqual(len(df.index), 4*9)
       # self.assertEqual(len(merged_result.get_histories()), 4*9)
        self.assertEqual(len(merged_result.get_benchmark_configs()), 4*9)

        configs_to_run = BenchmarkRunner.get_configs_to_run(benchmark_runner.configs_to_run, merged_result)
        self.assertEqual(len(configs_to_run), 4)
        ray.shutdown()
        benchmark_runner2 = BenchmarkRunnerMP(benchmark_database=db,
                                           evaluators=evaluators,
                                           terminal_when=terminal_when,
                                           behaviors=behaviors_tested,
                                           log_eval_avg_every=1,
                                           checkpoint_dir="checkpoints2/",
                                           merge_existing=True)

        result = benchmark_runner2.run(checkpoint_every = 1)
        df = result.get_data_frame()
        print(df)
        self.assertEqual(len(df.index), 40) # 2 Behaviors * 10 Serialize Scenarios * 2 scenario sets

        # check if existing result is incorporated for mergin result
        merged_result = BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir="checkpoints2/")
        df = merged_result.get_data_frame()
        self.assertEqual(len(df.index), 40)
    def test_database_runner_python_behavior(self):
        dbs = DatabaseSerializer(test_scenarios=4,
                                 test_world_steps=5,
                                 num_serialize_scenarios=2)
        dbs.process("data/database1")
        local_release_filename = dbs.release(version="test")

        db = BenchmarkDatabase(database_root=local_release_filename)
        safe_dist_params = ParameterServer(log_if_default=True)
        evaluators = {
            "success": "EvaluatorGoalReached",
            "collision": "EvaluatorCollisionEgoAgent",
            "max_steps": "EvaluatorStepCount",
            "safe_dist_lon": {
                "type": "EvaluatorDynamicSafeDist",
                "params": safe_dist_params
            },
            "safe_dist_lat": {
                "type": "EvaluatorStaticSafeDist",
                "params": safe_dist_params
            }
        }
        terminal_when = {
            "collision": lambda x: x,
            "max_steps": lambda x: x > 2,
            "safe_dist_lon": lambda x: x
        }
        params = ParameterServer(
        )  # only for evaluated agents not passed to scenario!
        behaviors_tested = {"python_behavior": PythonDistanceBehavior(params)}

        benchmark_runner = BenchmarkRunner(benchmark_database=db,
                                           evaluators=evaluators,
                                           terminal_when=terminal_when,
                                           behaviors=behaviors_tested,
                                           log_eval_avg_every=5,
                                           deepcopy=False)
        benchmark_runner.clear_checkpoint_dir()
        result = benchmark_runner.run()
        df = result.get_data_frame()
        print(df)
        self.assertEqual(
            len(df.index),
            1 * 2 * 2)  # 2 Behaviors * 2 Serialize Scenarios * 1 scenario sets
        result.load_benchmark_configs()
        groups = result.get_evaluation_groups()
        self.assertEqual(set(groups), set(["behavior", "scen_set"]))
 def reset(self, training_env, num_episodes, max_episode_steps, agent):
     super(TrainingBenchmarkDatabase, self).reset(training_env, num_episodes, \
                                        max_episode_steps, agent)
     benchmark_configs = self.create_benchmark_configs(num_episodes)
     evaluators = default_training_evaluators()
     if self.evaluators:
         evaluators = {**self.evaluators, **evaluators}
     terminal_when = default_terminal_criteria(max_episode_steps)
     if self.terminal_when:
         terminal_when = {**self.terminal_when, **terminal_when}
     self.benchmark_runner = BenchmarkRunner(
         benchmark_configs=benchmark_configs,
         evaluators=evaluators,
         terminal_when=terminal_when,
         num_scenarios=num_episodes,
         log_eval_avg_every=100000000000,
         checkpoint_dir="checkpoints",
         merge_existing=False,
         deepcopy=False)
 def reset(self, training_env, num_episodes, max_episode_steps, agent):
     super(TrainingBenchmarkDatabase, self).reset(training_env, num_episodes, \
                                        max_episode_steps, agent)
     evaluators = default_training_evaluators()
     if self.evaluators:
         evaluators = {**self.evaluators, **evaluators}
     terminal_when = default_terminal_criteria(max_episode_steps)
     if self.terminal_when:
         terminal_when = {**self.terminal_when, **terminal_when}
     self.benchmark_runner = BenchmarkRunner(
         benchmark_database=self.
         database,  # this has priority over scenario generation
         scenario_generation=self.training_env._scenario_generator,
         evaluators=evaluators,
         terminal_when=terminal_when,
         num_scenarios=num_episodes,
         log_eval_avg_every=100000000000,
         checkpoint_dir="checkpoints",
         merge_existing=False,
         deepcopy=False)
class TrainingBenchmarkDatabase(TrainingBenchmark):
    def __init__(self,
                 benchmark_database=None,
                 evaluators=None,
                 terminal_when=None):
        self.database = benchmark_database
        self.evaluators = evaluators
        self.terminal_when = terminal_when

    def reset(self, training_env, num_episodes, max_episode_steps, agent):
        super(TrainingBenchmarkDatabase, self).reset(training_env, num_episodes, \
                                           max_episode_steps, agent)
        evaluators = default_training_evaluators()
        if self.evaluators:
            evaluators = {**self.evaluators, **evaluators}
        terminal_when = default_terminal_criteria(max_episode_steps)
        if self.terminal_when:
            terminal_when = {**self.terminal_when, **terminal_when}
        self.benchmark_runner = BenchmarkRunner(
            benchmark_database=self.
            database,  # this has priority over scenario generation
            scenario_generation=self.training_env._scenario_generator,
            evaluators=evaluators,
            terminal_when=terminal_when,
            num_scenarios=num_episodes,
            log_eval_avg_every=100000000000,
            checkpoint_dir="checkpoints",
            merge_existing=False,
            deepcopy=False)

    def run(self):
        mean_return, formatting = super(TrainingBenchmarkDatabase, self).run()
        eval_result = self.benchmark_runner.run()
        data_frame = eval_result.get_data_frame()
        data_frame["max_steps"] = data_frame.Terminal.apply(
            lambda x: "max_steps" in x and (not "collision" in x))
        data_frame["success"] = data_frame.Terminal.apply(
            lambda x: "success" in x and (not "collision" in x) and
            (not "max_steps" in x))
        data_frame = data_frame.drop(columns=[
            "scen_set", "scen_idx", "behavior", "Terminal", "step",
            "config_idx"
        ])
        mean = data_frame.mean(axis=0)
        eval_result = {**mean.to_dict(), **mean_return}
        return eval_result, f"Benchmark Result: {eval_result}"

    def is_better(self, eval_result1, than_eval_result2):
        pass
    def test_database_runner_checkpoint(self):
        dbs = DatabaseSerializer(test_scenarios=4,
                                 test_world_steps=5,
                                 num_serialize_scenarios=10)
        dbs.process("data/database1")
        local_release_filename = dbs.release(version="test")

        db = BenchmarkDatabase(database_root=local_release_filename)
        evaluators = {
            "success": "EvaluatorGoalReached",
            "collision": "EvaluatorCollisionEgoAgent",
            "max_steps": "EvaluatorStepCount"
        }
        terminal_when = {
            "collision": lambda x: x,
            "max_steps": lambda x: x > 2
        }
        params = ParameterServer(
        )  # only for evaluated agents not passed to scenario!
        behaviors_tested = {
            "IDM": BehaviorIDMClassic(params),
            "Const": BehaviorConstantAcceleration(params)
        }

        benchmark_runner = BenchmarkRunner(benchmark_database=db,
                                           evaluators=evaluators,
                                           terminal_when=terminal_when,
                                           behaviors=behaviors_tested,
                                           log_eval_avg_every=20,
                                           checkpoint_dir="checkpoints1/")
        benchmark_runner.clear_checkpoint_dir()
        # one run after 30 steps benchmark dumped
        result = benchmark_runner.run(checkpoint_every=30)
        df = result.get_data_frame()
        print(df)
        self.assertEqual(
            len(df.index),
            40)  # 2 Behaviors * 10 Serialize Scenarios * 2 scenario sets
        # check twice first, merging from checkpoints
        merged_result = BenchmarkRunner.merge_checkpoint_benchmark_results(
            checkpoint_dir="checkpoints1/")
        df = merged_result.get_data_frame()
        self.assertEqual(len(df.index), 40)
class TrainingBenchmarkDatabase(TrainingBenchmark):
    def __init__(self,
                 benchmark_database=None,
                 evaluators=None,
                 terminal_when=None):
        self.database = benchmark_database
        self.evaluators = evaluators
        self.terminal_when = terminal_when

    def create_benchmark_configs(self, num_scenarios):
        benchmark_configs = []
        if self.database:
            for scenario_generator, scenario_set_name, scenario_set_param_desc in self.database:
                benchmark_configs.extend(self.benchmark_configs_from_scen_gen( \
                                        scenario_generator, scenario_set_name, \
                                          scenario_set_param_desc, num_scenarios))
        else:
            scenario_generator = self.training_env._scenario_generator
            benchmark_configs.extend(self.benchmark_configs_from_scen_gen(
                                      scenario_generator, "training_env", \
                                        {}, num_scenarios))
        return benchmark_configs

    def benchmark_configs_from_scen_gen(self, scenario_generator, scenario_set_name, \
                                         scenario_set_param_desc, num_scenarios):
        benchmark_configs = []
        for scenario, scenario_idx in scenario_generator:
            if num_scenarios and scenario_idx >= num_scenarios:
                break
            behavior_config = BehaviorConfig("agent", self.agent, None)
            benchmark_config = \
                        BenchmarkConfig(
                            len(benchmark_configs),
                            behavior_config,
                            scenario,
                            scenario_idx,
                            scenario_set_name,
                            scenario_set_param_desc
                        )
            benchmark_configs.append(benchmark_config)
        return benchmark_configs

    def reset(self, training_env, num_episodes, max_episode_steps, agent):
        super(TrainingBenchmarkDatabase, self).reset(training_env, num_episodes, \
                                           max_episode_steps, agent)
        benchmark_configs = self.create_benchmark_configs(num_episodes)
        evaluators = default_training_evaluators()
        if self.evaluators:
            evaluators = {**self.evaluators, **evaluators}
        terminal_when = default_terminal_criteria(max_episode_steps)
        if self.terminal_when:
            terminal_when = {**self.terminal_when, **terminal_when}
        self.benchmark_runner = BenchmarkRunner(
            benchmark_configs=benchmark_configs,
            evaluators=evaluators,
            terminal_when=terminal_when,
            num_scenarios=num_episodes,
            log_eval_avg_every=100000000000,
            checkpoint_dir="checkpoints",
            merge_existing=False,
            deepcopy=False)

    def run(self):
        mean_return, formatting = super(TrainingBenchmarkDatabase, self).run()
        eval_result = self.benchmark_runner.run()
        data_frame = eval_result.get_data_frame()
        data_frame["max_steps"] = data_frame.Terminal.apply(
            lambda x: "max_steps" in x and (not "collision" in x))
        data_frame["success"] = data_frame.Terminal.apply(
            lambda x: "success" in x and (not "collision" in x) and
            (not "max_steps" in x))
        data_frame = data_frame.drop(columns=[
            "scen_set", "scen_idx", "behavior", "Terminal", "step",
            "config_idx"
        ])
        mean = data_frame.mean(axis=0)
        eval_result = {**mean.to_dict(), **mean_return}
        return eval_result, f"Benchmark Result: {eval_result}"

    def is_better(self, eval_result1, than_eval_result2):
        pass
Beispiel #11
0
try:
    from bark.core.models.behavior import BehaviorUCTSingleAgent
    behavior_used = BehaviorUCTSingleAgent
except:
    print(
        "BehaviorUCTSingleAgent not available, rerun example with `bazel run //examples:benchmark_database --define planner_uct=true"
    )
    exit()

db = BenchmarkDatabase(database_root="external/benchmark_database_release")
evaluators = {
    "success": EvaluatorGoalReached,
    "collision": EvaluatorCollisionEgoAgent,
    "max_steps": EvaluatorStepCount
}

terminal_when = {"collision": lambda x: x, "max_steps": lambda x: x > 2}

scenario_param_file = "uct_planner.json"  # must be within examples params folder
params = ParameterServer(
    filename=os.path.join("examples/params/", scenario_param_file))
behaviors_tested = {"search5s": behavior_used(params)}

benchmark_runner = BenchmarkRunner(benchmark_database=db,
                                   evaluators=evaluators,
                                   terminal_when=terminal_when,
                                   behaviors=behaviors_tested)

benchmark_runner.run(1)
benchmark_runner.dataframe.to_pickle("uct_planner_results.pickle")