Esempio n. 1
0
 def test_dump_and_load_results(self):
     result_data = random_result_data(size = 10)
     br = BenchmarkResult(result_dict=result_data)
     br.dump("./results")
     br_loaded = BenchmarkResult.load("./results")
     loaded_dict = br_loaded.get_result_dict()
     self.assertEqual(result_data, loaded_dict)
Esempio n. 2
0
    def run(self, viewer=None, maintain_history=False, checkpoint_every=None):
        results = []
        histories = {}
        for idx, bmark_conf in enumerate(self.configs_to_run):
            self.logger.info(
                "Running config idx {} being {}/{}: Scenario {} of set \"{}\" for behavior \"{}\""
                .format(bmark_conf.config_idx, idx,
                        len(self.benchmark_configs) - 1,
                        bmark_conf.scenario_idx, bmark_conf.scenario_set_name,
                        bmark_conf.behavior_config.behavior_name))
            bmark_conf = copy.deepcopy(
                bmark_conf) if self._deepcopy else bmark_conf
            result_dict, scenario_history = self._run_benchmark_config(
                bmark_conf, viewer, maintain_history)
            results.append(result_dict)
            histories[bmark_conf.config_idx] = scenario_history
            if self.log_eval_avg_every and (idx +
                                            1) % self.log_eval_avg_every == 0:
                self._log_eval_average(results, self.configs_to_run)

            if checkpoint_every and (idx + 1) % checkpoint_every == 0:
                intermediate_result = BenchmarkResult(results, \
                         self.configs_to_run[0:idx+1], histories=histories)
                checkpoint_file = os.path.join(self.checkpoint_dir,
                                               self.get_checkpoint_file_name())
                intermediate_result.dump(checkpoint_file,
                                         dump_configs=True,
                                         dump_histories=maintain_history)
                self.logger.info("Saved checkpoint {}".format(checkpoint_file))
        benchmark_result = BenchmarkResult(results,
                                           self.configs_to_run,
                                           histories=histories)
        self.existing_benchmark_result.extend(benchmark_result)
        return self.existing_benchmark_result
Esempio n. 3
0
    def _log_eval_average(self, result_dct_list, configs):
        bresult = BenchmarkResult(result_dct_list, configs)
        df = bresult.get_data_frame()
        grouped = df.apply(pd.to_numeric, errors='ignore').groupby(bresult.get_evaluation_groups()).mean()[
            self._evaluation_criteria()]
        self.logger.info("\n------------------- Current Evaluation Results ---------------------- \n Num. Results:{}\n {} \n \
---------------------------------------------------------------------".format(len(result_dct_list),
                                                                              grouped.to_string()))
Esempio n. 4
0
    def __init__(self,
                 benchmark_database=None,
                 evaluators=None,
                 terminal_when=None,
                 behaviors=None,
                 behavior_configs=None,
                 num_scenarios=None,
                 benchmark_configs=None,
                 scenario_generation=None,
                 logger_name=None,
                 log_eval_avg_every=None,
                 checkpoint_dir=None,
                 merge_existing=False,
                 deepcopy=True):

        self.benchmark_database = benchmark_database
        self.scenario_generation = scenario_generation
        self.evaluators = evaluators if isinstance(evaluators, EvaluationConfig) else EvaluationConfig(evaluators)
        self.terminal_when = terminal_when or []
        if behaviors:
          self.behavior_configs = BehaviorConfig.configs_from_dict(behaviors)
        else:
          self.behavior_configs = behavior_configs or {}
        if benchmark_configs:
          self.benchmark_configs = benchmark_configs
        elif benchmark_database:
          self.benchmark_configs = \
                                 self._create_configurations_from_database(num_scenarios)
        elif scenario_generation:
          self.benchmark_configs = \
                                  self._create_configurations_from_scenario_generation(num_scenarios)
        else:
          self.benchmark_configs = [] # to be compatible when benchmark runner is used in ray actor

        self.logger = logging.getLogger(logger_name or "BenchmarkRunner")
        self.logger.setLevel(logging.DEBUG)
        self.logger.info("Total number of {} configs to run".format(len(self.benchmark_configs)))
        self.configs_to_run = self.benchmark_configs
        self._deepcopy = deepcopy
        self.checkpoint_dir = checkpoint_dir or "checkpoints"
        self.existing_benchmark_result = BenchmarkResult(file_name= \
                    BenchmarkRunner.get_merged_result_filename(self.checkpoint_dir))
        if not os.path.exists(self.checkpoint_dir):
            os.makedirs(self.checkpoint_dir)

        if merge_existing:
            self.existing_benchmark_result = \
                BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir)
            self.logger.info("Merged {} processed configs in folder {}". \
                format(len(self.existing_benchmark_result.get_benchmark_configs()), checkpoint_dir))
            self.configs_to_run = self.get_configs_to_run(self.benchmark_configs, \
                                                            self.existing_benchmark_result)
            self.logger.info("Remaining  number of {} configs to run".format(len(self.configs_to_run)))

        self.exceptions_caught = []
        self.log_eval_avg_every = log_eval_avg_every
Esempio n. 5
0
    def _log_eval_average(self, result_dct_list, configs):
        bresult = BenchmarkResult(result_dct_list, configs)
        df = bresult.get_data_frame()
        for eval_group in bresult.get_evaluation_groups():
          if eval_group not in df.columns:
            df[eval_group] = np.nan
        df.fillna(-1, inplace=True)
        grouped = df.apply(pd.to_numeric, errors='ignore').groupby(bresult.get_evaluation_groups()).mean()
        self.logger.info("\n------------------- Current Evaluation Results ---------------------- \n Num. Results:{}\n {} \n \
---------------------------------------------------------------------".format(len(result_dct_list),
                                                                              grouped.to_string()))
Esempio n. 6
0
 def test_dump_and_load_histories_one(self):
     result_num = 2
     result_data = random_result_data(size = result_num)
     histories = random_history_data(result_num, 20)
     br = BenchmarkResult(result_dict=result_data, histories=histories)
     br.dump("./results_with_history", dump_histories=True, max_mb_per_file = 2)
     br_loaded = BenchmarkResult.load("./results_with_history")
     br_loaded.load_histories(config_idx_list = list(histories.keys()))
     loaded_histories = br_loaded.get_histories()
     self.assertEqual(histories, loaded_histories)
     loaded_dict = br_loaded.get_result_dict()
     self.assertEqual(result_data, loaded_dict)
Esempio n. 7
0
    def merge_checkpoint_benchmark_results(checkpoint_dir):
        checkpoint_files = glob.glob(os.path.join(checkpoint_dir,
                                                  "**/*.ckpnt"),
                                     recursive=True)
        merged_result = BenchmarkResult()
        # merge all checkpoints with new results
        for checkpoint_file in checkpoint_files:
            logging.info("Loading checkpoint {}".format(
                os.path.abspath(checkpoint_file)))
            next_result = BenchmarkResult.load(os.path.abspath(checkpoint_file), \
                load_configs=True, load_histories=True)
            merged_result.extend(next_result)
        # dump merged result
        if len(merged_result.get_result_dict()) > 0:
            logging.info("Dumping merged result")
            merged_result_filename = os.path.join(checkpoint_dir,
                                                  "merged_results.ckpnt")
            merged_result.dump(merged_result_filename, \
                dump_configs=True, dump_histories=True)

        # delete checkpoints
        for checkpoint_file in checkpoint_files:
            if checkpoint_file == merged_result_filename:
                continue
            os.remove(checkpoint_file)
            logging.info(
                "Removed old checkpoint file {}".format(checkpoint_file))
        return merged_result
Esempio n. 8
0
def dump():
    ckpt_dir = [
        os.path.join('/home/ekumar/master_thesis/code/hythe-src/checkpoints/',
                     ck) for ck in
        os.listdir("/home/ekumar/master_thesis/code/hythe-src/checkpoints/")
    ]
    eval_criteria = {"goal_reached": lambda x: x}
    print(ckpt_dir)
    demos_dir = os.path.join('/home/ekumar/demos/')
    list_of_demos = []
    for cdir in ckpt_dir:
        print(f"Extracting result {cdir}")
        result = BenchmarkResult.load_results(cdir)
        democ = DemonstrationCollector()
        democ._collection_result = result
        democ._directory = demos_dir
        demos = democ.ProcessCollectionResult(eval_criteria)
        list_of_demos.extend(demos)
    # make the demonstrations dir in the exp root
    os.makedirs(
        "/home/ekumar/output/experiments/exp_c76fc949-e95f-4774-91ba-6bec575ada37/demonstrations/generated_demonstrations"
    )
    to_pickle(
        list_of_demos,
        "/home/ekumar/output/experiments/exp_c76fc949-e95f-4774-91ba-6bec575ada37/demonstrations/generated_demonstrations",
        "demonstrations")
    collector = DemonstrationCollector.load(
        "/home/ekumar/output/experiments/exp_c76fc949-e95f-4774-91ba-6bec575ada37/demonstrations/generated_demonstrations"
    )
    print("Total demonstations found:",
          len(collector.GetDemonstrationExperiences()))
    return
Esempio n. 9
0
    def get_configs_to_run(benchmark_configs, existing_benchmark_result):
        existing_inds = existing_benchmark_result.get_benchmark_config_indices()
        required_inds = BenchmarkResult(benchmark_configs=benchmark_configs).get_benchmark_config_indices()
        missing_inds = list(set(required_inds) - set(existing_inds))

        filtered_configs = filter(lambda bc : bc.config_idx in missing_inds, benchmark_configs)
        return list(filtered_configs)
Esempio n. 10
0
def dummy_benchmark_results():
    br = [
        {
            "config_idx": 24,
            "collision": False,
            "rss": True,
            "behavior": "test1",
            "scen_idx": 12
        },
        {
            "config_idx": 1,
            "collision": True,
            "rss": False,
            "behavior": "test",
            "scen_idx": 10
        },
        {
            "config_idx": 500,
            "collision": True,
            "rss": True,
            "behavior": "test",
            "scen_idx": 112
        },
    ]
    behaviorc = BehaviorConfig("behavior_name", "behavior")
    benchmarkc = BenchmarkConfig(0, behaviorc, "test scenario", 0,
                                 "scenario_set_name")
    return BenchmarkResult(result_dict=br, benchmark_configs=[benchmarkc])
Esempio n. 11
0
 def run_benchmark_config(self, config_idx, **kwargs):
     for idx, bmark_conf in enumerate(self.benchmark_configs):
         if bmark_conf.config_idx == config_idx:
             result_dict, scenario_history = self._run_benchmark_config(copy.deepcopy(bmark_conf), **kwargs)
             return BenchmarkResult(result_dict, [bmark_conf], histories={config_idx : scenario_history})
     self.logger.error("Config idx {} not found in benchmark configs. Skipping...".format(config_idx))
     return
Esempio n. 12
0
    def __init__(self,
                 benchmark_database=None,
                 evaluators=None,
                 terminal_when=None,
                 behaviors=None,
                 behavior_configs=None,
                 num_scenarios=None,
                 benchmark_configs=None,
                 logger_name=None,
                 log_eval_avg_every=None,
                 checkpoint_dir=None,
                 merge_existing=False):

        self.benchmark_database = benchmark_database
        self.evaluators = evaluators or {}
        self.terminal_when = terminal_when or []
        if behaviors:
          self.behavior_configs = BehaviorConfig.configs_from_dict(behaviors)
        else:
          self.behavior_configs = behavior_configs or {}
        self.benchmark_configs = benchmark_configs or \
                                 self._create_configurations(num_scenarios)

        self.logger = logging.getLogger(logger_name or "BenchmarkRunner")
        self.logger.setLevel(logging.DEBUG)
        self.logger.info("Total number of {} configs to run".format(len(self.benchmark_configs)))
        self.existing_benchmark_result = BenchmarkResult()
        self.configs_to_run = self.benchmark_configs

        self.checkpoint_dir = checkpoint_dir or "checkpoints"
        if not os.path.exists(self.checkpoint_dir):
            os.makedirs(self.checkpoint_dir)

        if merge_existing:
            self.existing_benchmark_result = \
                BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir)
            self.logger.info("Merged {} processed configs in folder {}". \
                format(len(self.existing_benchmark_result.get_benchmark_configs()), checkpoint_dir)) 
            self.configs_to_run = self.get_configs_to_run(self.benchmark_configs, \
                                                            self.existing_benchmark_result)
            self.logger.info("Remaining  number of {} configs to run".format(len(self.configs_to_run)))

        self.exceptions_caught = []
        self.log_eval_avg_every = log_eval_avg_every
Esempio n. 13
0
    def test_make_scenarios_congruent(self):
        brst = BenchmarkResult(result_dict=dummy_benchmark_results(),
                               benchmark_configs=None)
        analyzer = BenchmarkAnalyzer(benchmark_result=brst)

        # scenarios 5 and 12 in all lists
        congruent_list = analyzer.make_scenarios_congruent(
            configs_idx_lists=[[3, 2, 500, 11], [35, 1, 41],
                               [500, 41, 12, 121]])
        self.assertEqual(congruent_list[0], [11, 2])
Esempio n. 14
0
    def test_dump_and_partial_load(self):
        result_num = 100
        confs, result_data, histories = random_benchmark_conf_data(result_num, 2000000, hist_size=1500000)
        br = BenchmarkResult(result_dict=result_data,
          benchmark_configs=confs, histories=histories)
        br.dump("./results_all", dump_configs=True, dump_histories=True, max_mb_per_file = 5)
        br_loaded = BenchmarkResult.load("./results_all")
        loaded_dict = br_loaded.get_result_dict()
        self.assertEqual(br.get_result_dict(), loaded_dict)

        loaded_configs_idx = list(range(10, 20))
        processed_files = br_loaded.load_benchmark_configs(config_idx_list = loaded_configs_idx)
        loaded_confs = br_loaded.get_benchmark_configs()
        self.assertEqual(len(loaded_confs), 10)
        # 2mb per conf, max 5 mb per file -> 2 confs per file -> 10/2 = 5files
        self.assertEqual(len(processed_files), 5) 
        for conf_idx in loaded_configs_idx:
            self.assertEqual(br_loaded.get_benchmark_config(conf_idx), confs[conf_idx])

        loaded_configs_idx = list(range(10, 27))
        processed_files = br_loaded.load_histories(config_idx_list = loaded_configs_idx)
        loaded_histories = br_loaded.get_histories()
        self.assertEqual(len(loaded_histories), 18) # one more as specified since it was in the last file
        # 1.5mb per history, max 5 mb per file -> 3 confs per file -> 17/3 = 6files
        self.assertEqual(len(processed_files), 6)
        for conf_idx in loaded_configs_idx:
            self.assertEqual(br_loaded.get_history(conf_idx), histories[conf_idx])
 def load(directory):
     collector = DemonstrationCollector()
     collection_result_fullname = os.path.join(
         directory, DemonstrationCollector.collection_result_filename())
     if os.path.exists(collection_result_fullname):
         collector._collection_result = BenchmarkResult.load(
             collection_result_fullname)
     demonstration_fullname = os.path.join(
         directory, DemonstrationCollector.demonstrations_filename())
     if os.path.exists(demonstration_fullname):
         collector._demonstrations = from_pickle(
             directory, DemonstrationCollector.demonstrations_filename())
     collector._directory = directory
     return collector
Esempio n. 16
0
    def test_find_config_idx(self):
        brst = BenchmarkResult(result_dict=dummy_benchmark_results(),
                               benchmark_configs=None)
        analyzer = BenchmarkAnalyzer(benchmark_result=brst)

        configs_found = analyzer.find_configs({"collision": lambda x: x})
        self.assertEqual(configs_found, [1, 500, 11])

        configs_found = analyzer.find_configs({"collision": lambda x: not x})
        self.assertEqual(configs_found, [24, 2, 41, 3, 12, 121, 35, 42])

        configs_found = analyzer.find_configs({"metric1": lambda x: x == 0.1})
        self.assertEqual(configs_found, [24, 11])

        configs_found = analyzer.find_configs({
            "collision": lambda x: not x,
            "metric1": lambda x: x == 0.1
        })
        self.assertEqual(configs_found, [24])

        configs_found = analyzer.find_configs({
            "collision": lambda x: x,
            "metric1": lambda x: x > 1
        })
        self.assertEqual(configs_found, [1, 500])

        configs_found = analyzer.find_configs({
            "collision":
            lambda x: x,
            "behavior":
            lambda x: x == "test2"
        })
        self.assertEqual(configs_found, [])

        configs_found = analyzer.find_configs(scenario_idx_list=[4, 10, 7])
        self.assertEqual(configs_found, [121, 1, 42])

        configs_found = analyzer.find_configs({"collision": lambda x: not x},
                                              scenario_idx_list=[4, 10, 7])
        self.assertEqual(configs_found, [121, 42])

        configs_found = analyzer.find_configs(
            {"collision": lambda x: x},
            scenarios_as_in_configs=[24, 11, 121, 1])
        self.assertEqual(configs_found, [11, 1])

        configs_found = analyzer.find_configs(scenario_idx_list=[4, 10, 7],
                                              in_configs=[1, 42])
        self.assertEqual(configs_found, [1, 42])
Esempio n. 17
0
 def test_dump_and_load_benchmark_configs(self):
     result_num = 100
     confs, result_data, _ = random_benchmark_conf_data(result_num, 2000000)
     br = BenchmarkResult(result_dict=result_data, benchmark_configs=confs)
     br.dump("./results_with_confs", dump_configs=True, max_mb_per_file = 5)
     br_loaded = BenchmarkResult.load("./results_with_confs")
     br_loaded.load_benchmark_configs(config_idx_list = list(range(0, result_num)))
     loaded_confs = br_loaded.get_benchmark_configs()
     self.assertEqual(confs, loaded_confs)
     loaded_dict = br_loaded.get_result_dict()
     self.assertEqual(br.get_result_dict(), loaded_dict)
Esempio n. 18
0
 def run(self, viewer=None, maintain_history=False, checkpoint_every=None):
     results_tmp = ray.get([
         actor.run.remote(viewer, maintain_history, checkpoint_every)
         for actor in self.actors
     ])
     result_dict = []
     benchmark_configs = []
     histories = {}
     for result_tmp in results_tmp:
         result_dict.extend(result_tmp.get_result_dict())
         benchmark_configs.extend(result_tmp.get_benchmark_configs())
         histories.update(result_tmp.get_histories())
     benchmark_result = BenchmarkResult(result_dict,
                                        benchmark_configs,
                                        histories=histories)
     self.existing_benchmark_result.extend(benchmark_result)
     return self.existing_benchmark_result
Esempio n. 19
0
    def merge_checkpoint_benchmark_results(checkpoint_dir):
        checkpoint_files = glob.glob(os.path.join(checkpoint_dir, "**/*.ckpnt"), recursive=True)
        merged_result_filename = BenchmarkRunner.get_merged_result_filename(checkpoint_dir)
        if os.path.exists(merged_result_filename):
          merged_result = BenchmarkResult.load_results(filename=merged_result_filename)
        else:
          merged_result = BenchmarkResult(file_name=merged_result_filename)
        # merge all checkpoints with new results
        for checkpoint_file in checkpoint_files:
          loaded_result = BenchmarkResult.load(os.path.abspath(checkpoint_file))
          merged_result.extend(loaded_result, file_level=True)
          logging.info("Extending with checkpoint {}".format(checkpoint_file))

        # delete checkpoints
        for checkpoint_file in checkpoint_files:
          if "merged_result" in checkpoint_file:
            continue
          os.remove(checkpoint_file)
          logging.info("Removed old checkpoint file {}".format(checkpoint_file))
        return merged_result
Esempio n. 20
0
    def test_database_run_and_analyze(self):
        dbs = DatabaseSerializer(test_scenarios=2, test_world_steps=3, num_serialize_scenarios=2)
        # to find database files
        cwd = os.getcwd()
        if not debug:
          os.chdir("../benchmark_database/")
        else:
          os.chdir("bazel-bin/bark/benchmark/tests/py_benchmark_process_tests.runfiles/benchmark_database")
        dbs.process("data/database1")
        local_release_filename = dbs.release(version="test")

        db = BenchmarkDatabase(database_root=local_release_filename)
        evaluators = {"success" : "EvaluatorGoalReached", "collision" : "EvaluatorCollisionEgoAgent",
                      "max_steps": "EvaluatorStepCount"}
        terminal_when = {"collision" :lambda x: x, "max_steps": lambda x : x>2}
        params = ParameterServer() # only for evaluated agents not passed to scenario!
        behaviors_tested = {"IDM": BehaviorIDMClassic(params), "Const" : BehaviorConstantAcceleration(params)}

        benchmark_runner = BenchmarkRunnerMP(benchmark_database=db,
                                           evaluators=evaluators,
                                           terminal_when=terminal_when,
                                           behaviors=behaviors_tested,
                                           log_eval_avg_every=2)

        result = benchmark_runner.run(maintain_history=True)

        result.dump(os.path.join("./benchmark_results"), dump_configs=True, \
                         dump_histories=True, max_mb_per_file=1)
        result_loaded = BenchmarkResult.load(os.path.join("./benchmark_results"))
        result_loaded.load_histories()
        result_loaded.load_benchmark_configs()

        params2 = ParameterServer()

        fig = plt.figure(figsize=[10, 10])
        viewer = MPViewer(
              params=params2,
              center=[5112, 5165],
              y_length = 120,
              enforce_y_length=True,
              axis = fig.gca())

        analyzer = BenchmarkAnalyzer(benchmark_result=result_loaded)
        configs = analyzer.find_configs(criteria={"behavior": lambda x: x=="IDM", "success": lambda x : not x})
        configs_const = analyzer.find_configs(criteria={"behavior": lambda x: x=="Const", "success": lambda x : not x})

        os.chdir(cwd)
        #analyzer.visualize(configs_idx_list = configs,
                         # viewer = viewer, real_time_factor=10, fontsize=12)
        plt.close(fig)

        
        fig, (ax1, ax2) = plt.subplots(1, 2)
        viewer1 = MPViewer(
              params=params2,
              center=[5112, 5165],
              y_length = 120,
              enforce_y_length=True,
              axis = ax1)
        viewer2 = MPViewer(
              params=params2,
              center=[5112, 5165],
              y_length = 120,
              enforce_y_length=True,
              axis = ax2)
        analyzer.visualize(configs_idx_list = [configs[1:3], configs_const[1:3]],
                          viewer = [viewer1, viewer2], viewer_names=["IDM", "ConstVelocity"], real_time_factor=10, fontsize=12)
Esempio n. 21
0
class BenchmarkRunner:
    def __init__(self,
                 benchmark_database=None,
                 evaluators=None,
                 terminal_when=None,
                 behaviors=None,
                 behavior_configs=None,
                 num_scenarios=None,
                 benchmark_configs=None,
                 logger_name=None,
                 log_eval_avg_every=None,
                 checkpoint_dir=None,
                 merge_existing=False,
                 deepcopy=True):

        self.benchmark_database = benchmark_database
        self.evaluators = evaluators or {}
        self.terminal_when = terminal_when or []
        if behaviors:
            self.behavior_configs = BehaviorConfig.configs_from_dict(behaviors)
        else:
            self.behavior_configs = behavior_configs or {}
        self.benchmark_configs = benchmark_configs or \
                                 self._create_configurations(num_scenarios)

        self.logger = logging.getLogger(logger_name or "BenchmarkRunner")
        self.logger.setLevel(logging.DEBUG)
        self.logger.info("Total number of {} configs to run".format(
            len(self.benchmark_configs)))
        self.existing_benchmark_result = BenchmarkResult()
        self.configs_to_run = self.benchmark_configs
        self._deepcopy = deepcopy
        self.checkpoint_dir = checkpoint_dir or "checkpoints"
        if not os.path.exists(self.checkpoint_dir):
            os.makedirs(self.checkpoint_dir)

        if merge_existing:
            self.existing_benchmark_result = \
                BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir)
            self.logger.info("Merged {} processed configs in folder {}". \
                format(len(self.existing_benchmark_result.get_benchmark_configs()), checkpoint_dir))
            self.configs_to_run = self.get_configs_to_run(self.benchmark_configs, \
                                                            self.existing_benchmark_result)
            self.logger.info("Remaining  number of {} configs to run".format(
                len(self.configs_to_run)))

        self.exceptions_caught = []
        self.log_eval_avg_every = log_eval_avg_every

    def get_checkpoint_file_name(self):
        return "benchmark_runner.ckpnt"

    def clear_checkpoint_dir(self):
        files = glob.glob(os.path.join(self.checkpoint_dir, "*.ckpnt"))
        for f in files:
            os.remove(f)

    @staticmethod
    def merge_checkpoint_benchmark_results(checkpoint_dir):
        checkpoint_files = glob.glob(os.path.join(checkpoint_dir,
                                                  "**/*.ckpnt"),
                                     recursive=True)
        merged_result = BenchmarkResult()
        # merge all checkpoints with new results
        for checkpoint_file in checkpoint_files:
            logging.info("Loading checkpoint {}".format(
                os.path.abspath(checkpoint_file)))
            next_result = BenchmarkResult.load(os.path.abspath(checkpoint_file), \
                load_configs=True, load_histories=True)
            merged_result.extend(next_result)
        # dump merged result
        if len(merged_result.get_result_dict()) > 0:
            logging.info("Dumping merged result")
            merged_result_filename = os.path.join(checkpoint_dir,
                                                  "merged_results.ckpnt")
            merged_result.dump(merged_result_filename, \
                dump_configs=True, dump_histories=True)

        # delete checkpoints
        for checkpoint_file in checkpoint_files:
            if checkpoint_file == merged_result_filename:
                continue
            os.remove(checkpoint_file)
            logging.info(
                "Removed old checkpoint file {}".format(checkpoint_file))
        return merged_result

    @staticmethod
    def get_configs_to_run(benchmark_configs, existing_benchmark_result):
        existing_inds = existing_benchmark_result.get_benchmark_config_indices(
        )
        required_inds = BenchmarkResult(benchmark_configs=benchmark_configs
                                        ).get_benchmark_config_indices()
        missing_inds = list(set(required_inds) - set(existing_inds))

        filtered_configs = filter(lambda bc: bc.config_idx in missing_inds,
                                  benchmark_configs)
        return list(filtered_configs)

    def _create_configurations(self, num_scenarios=None):
        benchmark_configs = []
        for behavior_config in self.behavior_configs:
            # run over all scenario generators from benchmark database
            for scenario_generator, scenario_set_name, scenario_set_param_desc in self.benchmark_database:
                for scenario, scenario_idx in scenario_generator:
                    if num_scenarios and scenario_idx >= num_scenarios:
                        break
                    benchmark_config = \
                        BenchmarkConfig(
                            len(benchmark_configs),
                            behavior_config,
                            scenario,
                            scenario_idx,
                            scenario_set_name,
                            scenario_set_param_desc
                        )
                    benchmark_configs.append(benchmark_config)
        return benchmark_configs

    def run(self, viewer=None, maintain_history=False, checkpoint_every=None):
        results = []
        histories = {}
        for idx, bmark_conf in enumerate(self.configs_to_run):
            self.logger.info(
                "Running config idx {} being {}/{}: Scenario {} of set \"{}\" for behavior \"{}\""
                .format(bmark_conf.config_idx, idx,
                        len(self.benchmark_configs) - 1,
                        bmark_conf.scenario_idx, bmark_conf.scenario_set_name,
                        bmark_conf.behavior_config.behavior_name))
            bmark_conf = copy.deepcopy(
                bmark_conf) if self._deepcopy else bmark_conf
            result_dict, scenario_history = self._run_benchmark_config(
                bmark_conf, viewer, maintain_history)
            results.append(result_dict)
            histories[bmark_conf.config_idx] = scenario_history
            if self.log_eval_avg_every and (idx +
                                            1) % self.log_eval_avg_every == 0:
                self._log_eval_average(results, self.configs_to_run)

            if checkpoint_every and (idx + 1) % checkpoint_every == 0:
                intermediate_result = BenchmarkResult(results, \
                         self.configs_to_run[0:idx+1], histories=histories)
                checkpoint_file = os.path.join(self.checkpoint_dir,
                                               self.get_checkpoint_file_name())
                intermediate_result.dump(checkpoint_file,
                                         dump_configs=True,
                                         dump_histories=maintain_history)
                self.logger.info("Saved checkpoint {}".format(checkpoint_file))
        benchmark_result = BenchmarkResult(results,
                                           self.configs_to_run,
                                           histories=histories)
        self.existing_benchmark_result.extend(benchmark_result)
        return self.existing_benchmark_result

    def run_benchmark_config(self, config_idx, **kwargs):
        for idx, bmark_conf in enumerate(self.benchmark_configs):
            if bmark_conf.config_idx == config_idx:
                bmark_conf = copy.deepcopy(
                    bmark_conf) if self._deepcopy else bmark_conf
                result_dict, scenario_history = self._run_benchmark_config(
                    bmark_conf, **kwargs)
                return BenchmarkResult(
                    result_dict, [bmark_conf],
                    histories={config_idx: scenario_history})
        self.logger.error(
            "Config idx {} not found in benchmark configs. Skipping...".format(
                config_idx))
        return

    def _run_benchmark_config(self,
                              benchmark_config,
                              viewer=None,
                              maintain_history=False):
        scenario = benchmark_config.scenario
        behavior = benchmark_config.behavior_config.behavior
        parameter_server = ParameterServer(json=scenario._json_params)
        scenario_history = []
        step = 0
        try:
            world = scenario.GetWorldState()
        except Exception as e:
            self.logger.error(
                "For config-idx {}, Exception thrown in scenario.GetWorldState: {}"
                .format(benchmark_config.config_idx, e))
            self._append_exception(benchmark_config, e)
            return {
                **benchmark_config.as_dict(), "step": step,
                "Terminal": "exception_raised"
            }

        # if behavior is not None (None specifies that also the default model can be evalauted)
        if behavior:
            world.agents[scenario._eval_agent_ids[0]].behavior_model = behavior
        if maintain_history:
            self._append_to_scenario_history(scenario_history, world, scenario)
        self._reset_evaluators(world, scenario._eval_agent_ids)
        step_time = parameter_server["Simulation"]["StepTime", "", 0.2]
        if not isinstance(step_time, float):
            step_time = 0.2
        terminal = False
        terminal_why = None
        while not terminal:
            try:
                evaluation_dict = self._get_evalution_dict(world)
            except Exception as e:
                self.logger.error(
                    "For config-idx {}, Exception thrown in evaluation: {}".
                    format(benchmark_config.config_idx, e))
                terminal_why = "exception_raised"
                self._append_exception(benchmark_config, e)
                evaluation_dict = {}
                break
            terminal, terminal_why = self._is_terminal(evaluation_dict)
            if not terminal:
                if viewer:
                    viewer.drawWorld(
                        world,
                        scenario._eval_agent_ids,
                        scenario_idx=benchmark_config.scenario_idx)
                    viewer.show(block=False)
                    time.sleep(step_time)
                    viewer.clear()
                try:
                    world.Step(step_time)
                except Exception as e:
                    self.logger.error(
                        "For config-idx {}, Exception thrown in world.Step: {}"
                        .format(benchmark_config.config_idx, e))
                    terminal_why = "exception_raised"
                    self._append_exception(benchmark_config, e)
                    break

                if maintain_history:
                    self._append_to_scenario_history(scenario_history, world,
                                                     scenario)
                step += 1

        dct = {
            **benchmark_config.as_dict(), "step": step,
            **evaluation_dict, "Terminal": terminal_why
        }

        return dct, scenario_history

    def _append_to_scenario_history(self, scenario_history, world, scenario):
        scenario = Scenario(agent_list=list(world.agents.values()),
                            map_file_name=scenario.map_file_name,
                            eval_agent_ids=scenario.eval_agent_ids,
                            json_params=scenario.json_params)
        scenario_history.append(scenario.copy())

    def _append_exception(self, benchmark_config, exception):
        self.exceptions_caught.append((benchmark_config.config_idx, exception))

    def _reset_evaluators(self, world, eval_agent_ids):
        for evaluator_name, evaluator_params in self.evaluators.items():
            evaluator_bark = None
            if isinstance(evaluator_params, str):
                try:
                    evaluator_bark = eval(
                        "{}(eval_agent_ids[0])".format(evaluator_params))
                except:
                    evaluator_bark = eval("{}()".format(evaluator_params))
            elif isinstance(evaluator_params, dict):
                evaluator_bark = eval(
                    "{}(agent_id=eval_agent_ids[0], **evaluator_params['params'])"
                    .format(evaluator_params["type"]))
            else:
                raise ValueError
            world.AddEvaluator(evaluator_name, evaluator_bark)

    def _evaluation_criteria(self):
        bark_evals = [eval_crit for eval_crit, _ in self.evaluators.items()]
        bark_evals.append("step")
        return bark_evals

    def _get_evalution_dict(self, world):
        return world.Evaluate()

    def _is_terminal(self, evaluation_dict):
        terminal = False
        terminal_why = []
        for evaluator_name, function in self.terminal_when.items():
            if function(evaluation_dict[evaluator_name]):
                terminal = True
                terminal_why.append(evaluator_name)
        return terminal, terminal_why

    def _log_eval_average(self, result_dct_list, configs):
        bresult = BenchmarkResult(result_dct_list, configs)
        df = bresult.get_data_frame()
        for eval_group in bresult.get_evaluation_groups():
            if eval_group not in df.columns:
                df[eval_group] = np.nan
        df.fillna(-1, inplace=True)
        grouped = df.apply(pd.to_numeric, errors='ignore').groupby(
            bresult.get_evaluation_groups()).mean()[
                self._evaluation_criteria()]
        self.logger.info(
            "\n------------------- Current Evaluation Results ---------------------- \n Num. Results:{}\n {} \n \
---------------------------------------------------------------------".format(
                len(result_dct_list), grouped.to_string()))
Esempio n. 22
0
    def run(self, viewer=None, maintain_history=False, checkpoint_every=None):
        last_results = []
        last_histories = {}
        last_run_configs = []
        results = []
        checkpoint_file = os.path.abspath(os.path.join(self.checkpoint_dir, self.get_checkpoint_file_name()))
        last_result_file = os.path.abspath(os.path.join(self.checkpoint_dir, "tmp_{}".format(self.get_checkpoint_file_name())))
        checkpoint_result = BenchmarkResult(file_name=checkpoint_file)
        for idx, bmark_conf in enumerate(self.configs_to_run):
            self.logger.info("Running config idx {} being {}/{}: Scenario {} of set \"{}\" for behavior \"{}\"".format(
                bmark_conf.config_idx, idx, len(self.benchmark_configs) - 1, bmark_conf.scenario_idx,
                bmark_conf.scenario_set_name, bmark_conf.behavior_config.behavior_name))
            bmark_conf = copy.deepcopy(bmark_conf) if self._deepcopy else bmark_conf
            result_dict, scenario_history = self._run_benchmark_config(bmark_conf, viewer,
                                                                       maintain_history)
            results.append(result_dict)
            last_results.append(result_dict)
            last_histories[bmark_conf.config_idx] = scenario_history
            last_run_configs.append(bmark_conf)
            if self.log_eval_avg_every and (idx + 1) % self.log_eval_avg_every == 0:
                self._log_eval_average(results, self.configs_to_run)

            if checkpoint_every and (idx+1) % checkpoint_every == 0:
                # append results since last checkpoint
                last_benchmark_result = BenchmarkResult(result_dict=last_results, file_name = last_result_file, \
                         benchmark_configs=last_run_configs, histories=last_histories)
                last_benchmark_result.dump(last_result_file, dump_configs=True, dump_histories=maintain_history, append=False)
                checkpoint_result.extend(benchmark_result=last_benchmark_result, file_level=True)
                self.logger.info("Extended checkpoint {} with last result.".format(checkpoint_file))
                last_histories.clear()
                last_run_configs.clear()
                last_results.clear()
        # append results of last run
        last_benchmark_result = BenchmarkResult(result_dict=last_results, file_name = last_result_file, \
                         benchmark_configs=last_run_configs, histories=last_histories)
        last_benchmark_result.dump(last_result_file, dump_configs=True, dump_histories=maintain_history, append=False)
        checkpoint_result.extend(benchmark_result=last_benchmark_result, file_level=True)
        os.remove(last_result_file)
        self.logger.info("Extended checkpoint {} with final result.".format(checkpoint_file))
        checkpoint_result.extend(benchmark_result=self.existing_benchmark_result, file_level=True)
        return checkpoint_result
Esempio n. 23
0
    def test_extend_from_file(self):
        try:
          os.remove("./br1")
          os.remove("./br2")
          os.remove("./br3")
        except:
          pass
        result_num = 100
        confs, result_data, histories1 = random_benchmark_conf_data(result_num, 2000000, hist_size=1500000, offset=0)
        br1 = BenchmarkResult(result_dict=result_data,
          benchmark_configs=confs, histories=histories1)
        br1.dump("./br1", dump_histories=True, dump_configs=True)
        br1_df = br1.get_data_frame().copy()

        result_num = 30
        confs2, result_data2, histories2 = random_benchmark_conf_data(result_num, 2000000, hist_size=1500000, offset=200)
        br2 = BenchmarkResult(result_dict=result_data2,
          benchmark_configs=confs2, histories=histories2)
        br2.dump(filename="./br2", dump_histories=True, dump_configs=True)

        result_num = 10
        confs3, result_data3, histories3 = random_benchmark_conf_data(result_num, 2000000, hist_size=1500000, offset=400)
        br3 = BenchmarkResult(result_dict=result_data3,
          benchmark_configs=confs3, histories=histories3)
        br3.dump(filename="./br3", dump_histories=True, dump_configs=True)

        br1.extend(benchmark_result=br2, file_level=True)
        br1.extend(benchmark_result=br3, file_level=True)

        br_loaded = BenchmarkResult.load("./br1", load_histories=True, load_configs=True)
        df_desired = br1_df
        df_desired = pd.concat([df_desired, br2.get_data_frame()])
        df_desired = pd.concat([df_desired, br3.get_data_frame()])
        self.assertEqual(len(br_loaded.get_data_frame().index), len(df_desired.index))

        extended_confs = br_loaded.get_benchmark_configs()
        self.assertEqual(len(extended_confs), 140)
        extended_histories = br_loaded.get_histories()
        self.assertEqual(len(extended_histories), 140)
        extended_histories = histories1
        extended_histories.update(histories2)
        extended_histories.update(histories3)
        for bc in extended_confs:
            self.assertEqual(br_loaded.get_history(bc.config_idx), extended_histories[bc.config_idx])