Example #1
0
    def run(self, viewer=None, maintain_history=False, checkpoint_every=None):
        results = []
        histories = {}
        for idx, bmark_conf in enumerate(self.configs_to_run):
            self.logger.info(
                "Running config idx {} being {}/{}: Scenario {} of set \"{}\" for behavior \"{}\""
                .format(bmark_conf.config_idx, idx,
                        len(self.benchmark_configs) - 1,
                        bmark_conf.scenario_idx, bmark_conf.scenario_set_name,
                        bmark_conf.behavior_config.behavior_name))
            bmark_conf = copy.deepcopy(
                bmark_conf) if self._deepcopy else bmark_conf
            result_dict, scenario_history = self._run_benchmark_config(
                bmark_conf, viewer, maintain_history)
            results.append(result_dict)
            histories[bmark_conf.config_idx] = scenario_history
            if self.log_eval_avg_every and (idx +
                                            1) % self.log_eval_avg_every == 0:
                self._log_eval_average(results, self.configs_to_run)

            if checkpoint_every and (idx + 1) % checkpoint_every == 0:
                intermediate_result = BenchmarkResult(results, \
                         self.configs_to_run[0:idx+1], histories=histories)
                checkpoint_file = os.path.join(self.checkpoint_dir,
                                               self.get_checkpoint_file_name())
                intermediate_result.dump(checkpoint_file,
                                         dump_configs=True,
                                         dump_histories=maintain_history)
                self.logger.info("Saved checkpoint {}".format(checkpoint_file))
        benchmark_result = BenchmarkResult(results,
                                           self.configs_to_run,
                                           histories=histories)
        self.existing_benchmark_result.extend(benchmark_result)
        return self.existing_benchmark_result
Example #2
0
 def test_dump_and_load_results(self):
     result_data = random_result_data(size = 10)
     br = BenchmarkResult(result_dict=result_data)
     br.dump("./results")
     br_loaded = BenchmarkResult.load("./results")
     loaded_dict = br_loaded.get_result_dict()
     self.assertEqual(result_data, loaded_dict)
Example #3
0
    def merge_checkpoint_benchmark_results(checkpoint_dir):
        checkpoint_files = glob.glob(os.path.join(checkpoint_dir,
                                                  "**/*.ckpnt"),
                                     recursive=True)
        merged_result = BenchmarkResult()
        # merge all checkpoints with new results
        for checkpoint_file in checkpoint_files:
            logging.info("Loading checkpoint {}".format(
                os.path.abspath(checkpoint_file)))
            next_result = BenchmarkResult.load(os.path.abspath(checkpoint_file), \
                load_configs=True, load_histories=True)
            merged_result.extend(next_result)
        # dump merged result
        if len(merged_result.get_result_dict()) > 0:
            logging.info("Dumping merged result")
            merged_result_filename = os.path.join(checkpoint_dir,
                                                  "merged_results.ckpnt")
            merged_result.dump(merged_result_filename, \
                dump_configs=True, dump_histories=True)

        # delete checkpoints
        for checkpoint_file in checkpoint_files:
            if checkpoint_file == merged_result_filename:
                continue
            os.remove(checkpoint_file)
            logging.info(
                "Removed old checkpoint file {}".format(checkpoint_file))
        return merged_result
Example #4
0
    def get_configs_to_run(benchmark_configs, existing_benchmark_result):
        existing_inds = existing_benchmark_result.get_benchmark_config_indices()
        required_inds = BenchmarkResult(benchmark_configs=benchmark_configs).get_benchmark_config_indices()
        missing_inds = list(set(required_inds) - set(existing_inds))

        filtered_configs = filter(lambda bc : bc.config_idx in missing_inds, benchmark_configs)
        return list(filtered_configs)
Example #5
0
def dummy_benchmark_results():
    br = [
        {
            "config_idx": 24,
            "collision": False,
            "rss": True,
            "behavior": "test1",
            "scen_idx": 12
        },
        {
            "config_idx": 1,
            "collision": True,
            "rss": False,
            "behavior": "test",
            "scen_idx": 10
        },
        {
            "config_idx": 500,
            "collision": True,
            "rss": True,
            "behavior": "test",
            "scen_idx": 112
        },
    ]
    behaviorc = BehaviorConfig("behavior_name", "behavior")
    benchmarkc = BenchmarkConfig(0, behaviorc, "test scenario", 0,
                                 "scenario_set_name")
    return BenchmarkResult(result_dict=br, benchmark_configs=[benchmarkc])
Example #6
0
    def test_dump_and_partial_load(self):
        result_num = 100
        confs, result_data, histories = random_benchmark_conf_data(result_num, 2000000, hist_size=1500000)
        br = BenchmarkResult(result_dict=result_data,
          benchmark_configs=confs, histories=histories)
        br.dump("./results_all", dump_configs=True, dump_histories=True, max_mb_per_file = 5)
        br_loaded = BenchmarkResult.load("./results_all")
        loaded_dict = br_loaded.get_result_dict()
        self.assertEqual(br.get_result_dict(), loaded_dict)

        loaded_configs_idx = list(range(10, 20))
        processed_files = br_loaded.load_benchmark_configs(config_idx_list = loaded_configs_idx)
        loaded_confs = br_loaded.get_benchmark_configs()
        self.assertEqual(len(loaded_confs), 10)
        # 2mb per conf, max 5 mb per file -> 2 confs per file -> 10/2 = 5files
        self.assertEqual(len(processed_files), 5) 
        for conf_idx in loaded_configs_idx:
            self.assertEqual(br_loaded.get_benchmark_config(conf_idx), confs[conf_idx])

        loaded_configs_idx = list(range(10, 27))
        processed_files = br_loaded.load_histories(config_idx_list = loaded_configs_idx)
        loaded_histories = br_loaded.get_histories()
        self.assertEqual(len(loaded_histories), 18) # one more as specified since it was in the last file
        # 1.5mb per history, max 5 mb per file -> 3 confs per file -> 17/3 = 6files
        self.assertEqual(len(processed_files), 6)
        for conf_idx in loaded_configs_idx:
            self.assertEqual(br_loaded.get_history(conf_idx), histories[conf_idx])
Example #7
0
 def run_benchmark_config(self, config_idx, **kwargs):
     for idx, bmark_conf in enumerate(self.benchmark_configs):
         if bmark_conf.config_idx == config_idx:
             result_dict, scenario_history = self._run_benchmark_config(copy.deepcopy(bmark_conf), **kwargs)
             return BenchmarkResult(result_dict, [bmark_conf], histories={config_idx : scenario_history})
     self.logger.error("Config idx {} not found in benchmark configs. Skipping...".format(config_idx))
     return
Example #8
0
    def _log_eval_average(self, result_dct_list, configs):
        bresult = BenchmarkResult(result_dct_list, configs)
        df = bresult.get_data_frame()
        grouped = df.apply(pd.to_numeric, errors='ignore').groupby(bresult.get_evaluation_groups()).mean()[
            self._evaluation_criteria()]
        self.logger.info("\n------------------- Current Evaluation Results ---------------------- \n Num. Results:{}\n {} \n \
---------------------------------------------------------------------".format(len(result_dct_list),
                                                                              grouped.to_string()))
Example #9
0
    def test_extend_from_file(self):
        try:
          os.remove("./br1")
          os.remove("./br2")
          os.remove("./br3")
        except:
          pass
        result_num = 100
        confs, result_data, histories1 = random_benchmark_conf_data(result_num, 2000000, hist_size=1500000, offset=0)
        br1 = BenchmarkResult(result_dict=result_data,
          benchmark_configs=confs, histories=histories1)
        br1.dump("./br1", dump_histories=True, dump_configs=True)
        br1_df = br1.get_data_frame().copy()

        result_num = 30
        confs2, result_data2, histories2 = random_benchmark_conf_data(result_num, 2000000, hist_size=1500000, offset=200)
        br2 = BenchmarkResult(result_dict=result_data2,
          benchmark_configs=confs2, histories=histories2)
        br2.dump(filename="./br2", dump_histories=True, dump_configs=True)

        result_num = 10
        confs3, result_data3, histories3 = random_benchmark_conf_data(result_num, 2000000, hist_size=1500000, offset=400)
        br3 = BenchmarkResult(result_dict=result_data3,
          benchmark_configs=confs3, histories=histories3)
        br3.dump(filename="./br3", dump_histories=True, dump_configs=True)

        br1.extend(benchmark_result=br2, file_level=True)
        br1.extend(benchmark_result=br3, file_level=True)

        br_loaded = BenchmarkResult.load("./br1", load_histories=True, load_configs=True)
        df_desired = br1_df
        df_desired = pd.concat([df_desired, br2.get_data_frame()])
        df_desired = pd.concat([df_desired, br3.get_data_frame()])
        self.assertEqual(len(br_loaded.get_data_frame().index), len(df_desired.index))

        extended_confs = br_loaded.get_benchmark_configs()
        self.assertEqual(len(extended_confs), 140)
        extended_histories = br_loaded.get_histories()
        self.assertEqual(len(extended_histories), 140)
        extended_histories = histories1
        extended_histories.update(histories2)
        extended_histories.update(histories3)
        for bc in extended_confs:
            self.assertEqual(br_loaded.get_history(bc.config_idx), extended_histories[bc.config_idx])
Example #10
0
    def __init__(self,
                 benchmark_database=None,
                 evaluators=None,
                 terminal_when=None,
                 behaviors=None,
                 behavior_configs=None,
                 num_scenarios=None,
                 benchmark_configs=None,
                 scenario_generation=None,
                 logger_name=None,
                 log_eval_avg_every=None,
                 checkpoint_dir=None,
                 merge_existing=False,
                 deepcopy=True):

        self.benchmark_database = benchmark_database
        self.scenario_generation = scenario_generation
        self.evaluators = evaluators if isinstance(evaluators, EvaluationConfig) else EvaluationConfig(evaluators)
        self.terminal_when = terminal_when or []
        if behaviors:
          self.behavior_configs = BehaviorConfig.configs_from_dict(behaviors)
        else:
          self.behavior_configs = behavior_configs or {}
        if benchmark_configs:
          self.benchmark_configs = benchmark_configs
        elif benchmark_database:
          self.benchmark_configs = \
                                 self._create_configurations_from_database(num_scenarios)
        elif scenario_generation:
          self.benchmark_configs = \
                                  self._create_configurations_from_scenario_generation(num_scenarios)
        else:
          self.benchmark_configs = [] # to be compatible when benchmark runner is used in ray actor

        self.logger = logging.getLogger(logger_name or "BenchmarkRunner")
        self.logger.setLevel(logging.DEBUG)
        self.logger.info("Total number of {} configs to run".format(len(self.benchmark_configs)))
        self.configs_to_run = self.benchmark_configs
        self._deepcopy = deepcopy
        self.checkpoint_dir = checkpoint_dir or "checkpoints"
        self.existing_benchmark_result = BenchmarkResult(file_name= \
                    BenchmarkRunner.get_merged_result_filename(self.checkpoint_dir))
        if not os.path.exists(self.checkpoint_dir):
            os.makedirs(self.checkpoint_dir)

        if merge_existing:
            self.existing_benchmark_result = \
                BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir)
            self.logger.info("Merged {} processed configs in folder {}". \
                format(len(self.existing_benchmark_result.get_benchmark_configs()), checkpoint_dir))
            self.configs_to_run = self.get_configs_to_run(self.benchmark_configs, \
                                                            self.existing_benchmark_result)
            self.logger.info("Remaining  number of {} configs to run".format(len(self.configs_to_run)))

        self.exceptions_caught = []
        self.log_eval_avg_every = log_eval_avg_every
    def test_make_scenarios_congruent(self):
        brst = BenchmarkResult(result_dict=dummy_benchmark_results(),
                               benchmark_configs=None)
        analyzer = BenchmarkAnalyzer(benchmark_result=brst)

        # scenarios 5 and 12 in all lists
        congruent_list = analyzer.make_scenarios_congruent(
            configs_idx_lists=[[3, 2, 500, 11], [35, 1, 41],
                               [500, 41, 12, 121]])
        self.assertEqual(congruent_list[0], [11, 2])
Example #12
0
 def test_dump_and_load_benchmark_configs(self):
     result_num = 100
     confs, result_data, _ = random_benchmark_conf_data(result_num, 2000000)
     br = BenchmarkResult(result_dict=result_data, benchmark_configs=confs)
     br.dump("./results_with_confs", dump_configs=True, max_mb_per_file = 5)
     br_loaded = BenchmarkResult.load("./results_with_confs")
     br_loaded.load_benchmark_configs(config_idx_list = list(range(0, result_num)))
     loaded_confs = br_loaded.get_benchmark_configs()
     self.assertEqual(confs, loaded_confs)
     loaded_dict = br_loaded.get_result_dict()
     self.assertEqual(br.get_result_dict(), loaded_dict)
Example #13
0
    def _log_eval_average(self, result_dct_list, configs):
        bresult = BenchmarkResult(result_dct_list, configs)
        df = bresult.get_data_frame()
        for eval_group in bresult.get_evaluation_groups():
          if eval_group not in df.columns:
            df[eval_group] = np.nan
        df.fillna(-1, inplace=True)
        grouped = df.apply(pd.to_numeric, errors='ignore').groupby(bresult.get_evaluation_groups()).mean()
        self.logger.info("\n------------------- Current Evaluation Results ---------------------- \n Num. Results:{}\n {} \n \
---------------------------------------------------------------------".format(len(result_dct_list),
                                                                              grouped.to_string()))
Example #14
0
    def run(self, viewer=None, maintain_history=False, checkpoint_every=None):
        last_results = []
        last_histories = {}
        last_run_configs = []
        results = []
        checkpoint_file = os.path.abspath(os.path.join(self.checkpoint_dir, self.get_checkpoint_file_name()))
        last_result_file = os.path.abspath(os.path.join(self.checkpoint_dir, "tmp_{}".format(self.get_checkpoint_file_name())))
        checkpoint_result = BenchmarkResult(file_name=checkpoint_file)
        for idx, bmark_conf in enumerate(self.configs_to_run):
            self.logger.info("Running config idx {} being {}/{}: Scenario {} of set \"{}\" for behavior \"{}\"".format(
                bmark_conf.config_idx, idx, len(self.benchmark_configs) - 1, bmark_conf.scenario_idx,
                bmark_conf.scenario_set_name, bmark_conf.behavior_config.behavior_name))
            bmark_conf = copy.deepcopy(bmark_conf) if self._deepcopy else bmark_conf
            result_dict, scenario_history = self._run_benchmark_config(bmark_conf, viewer,
                                                                       maintain_history)
            results.append(result_dict)
            last_results.append(result_dict)
            last_histories[bmark_conf.config_idx] = scenario_history
            last_run_configs.append(bmark_conf)
            if self.log_eval_avg_every and (idx + 1) % self.log_eval_avg_every == 0:
                self._log_eval_average(results, self.configs_to_run)

            if checkpoint_every and (idx+1) % checkpoint_every == 0:
                # append results since last checkpoint
                last_benchmark_result = BenchmarkResult(result_dict=last_results, file_name = last_result_file, \
                         benchmark_configs=last_run_configs, histories=last_histories)
                last_benchmark_result.dump(last_result_file, dump_configs=True, dump_histories=maintain_history, append=False)
                checkpoint_result.extend(benchmark_result=last_benchmark_result, file_level=True)
                self.logger.info("Extended checkpoint {} with last result.".format(checkpoint_file))
                last_histories.clear()
                last_run_configs.clear()
                last_results.clear()
        # append results of last run
        last_benchmark_result = BenchmarkResult(result_dict=last_results, file_name = last_result_file, \
                         benchmark_configs=last_run_configs, histories=last_histories)
        last_benchmark_result.dump(last_result_file, dump_configs=True, dump_histories=maintain_history, append=False)
        checkpoint_result.extend(benchmark_result=last_benchmark_result, file_level=True)
        os.remove(last_result_file)
        self.logger.info("Extended checkpoint {} with final result.".format(checkpoint_file))
        checkpoint_result.extend(benchmark_result=self.existing_benchmark_result, file_level=True)
        return checkpoint_result
Example #15
0
 def test_dump_and_load_histories_one(self):
     result_num = 2
     result_data = random_result_data(size = result_num)
     histories = random_history_data(result_num, 20)
     br = BenchmarkResult(result_dict=result_data, histories=histories)
     br.dump("./results_with_history", dump_histories=True, max_mb_per_file = 2)
     br_loaded = BenchmarkResult.load("./results_with_history")
     br_loaded.load_histories(config_idx_list = list(histories.keys()))
     loaded_histories = br_loaded.get_histories()
     self.assertEqual(histories, loaded_histories)
     loaded_dict = br_loaded.get_result_dict()
     self.assertEqual(result_data, loaded_dict)
    def test_find_config_idx(self):
        brst = BenchmarkResult(result_dict=dummy_benchmark_results(),
                               benchmark_configs=None)
        analyzer = BenchmarkAnalyzer(benchmark_result=brst)

        configs_found = analyzer.find_configs({"collision": lambda x: x})
        self.assertEqual(configs_found, [1, 500, 11])

        configs_found = analyzer.find_configs({"collision": lambda x: not x})
        self.assertEqual(configs_found, [24, 2, 41, 3, 12, 121, 35, 42])

        configs_found = analyzer.find_configs({"metric1": lambda x: x == 0.1})
        self.assertEqual(configs_found, [24, 11])

        configs_found = analyzer.find_configs({
            "collision": lambda x: not x,
            "metric1": lambda x: x == 0.1
        })
        self.assertEqual(configs_found, [24])

        configs_found = analyzer.find_configs({
            "collision": lambda x: x,
            "metric1": lambda x: x > 1
        })
        self.assertEqual(configs_found, [1, 500])

        configs_found = analyzer.find_configs({
            "collision":
            lambda x: x,
            "behavior":
            lambda x: x == "test2"
        })
        self.assertEqual(configs_found, [])

        configs_found = analyzer.find_configs(scenario_idx_list=[4, 10, 7])
        self.assertEqual(configs_found, [121, 1, 42])

        configs_found = analyzer.find_configs({"collision": lambda x: not x},
                                              scenario_idx_list=[4, 10, 7])
        self.assertEqual(configs_found, [121, 42])

        configs_found = analyzer.find_configs(
            {"collision": lambda x: x},
            scenarios_as_in_configs=[24, 11, 121, 1])
        self.assertEqual(configs_found, [11, 1])

        configs_found = analyzer.find_configs(scenario_idx_list=[4, 10, 7],
                                              in_configs=[1, 42])
        self.assertEqual(configs_found, [1, 42])
Example #17
0
 def run(self, viewer=None, maintain_history=False, checkpoint_every=None):
     results_tmp = ray.get([
         actor.run.remote(viewer, maintain_history, checkpoint_every)
         for actor in self.actors
     ])
     result_dict = []
     benchmark_configs = []
     histories = {}
     for result_tmp in results_tmp:
         result_dict.extend(result_tmp.get_result_dict())
         benchmark_configs.extend(result_tmp.get_benchmark_configs())
         histories.update(result_tmp.get_histories())
     benchmark_result = BenchmarkResult(result_dict,
                                        benchmark_configs,
                                        histories=histories)
     self.existing_benchmark_result.extend(benchmark_result)
     return self.existing_benchmark_result
Example #18
0
    def __init__(self,
                 benchmark_database=None,
                 evaluators=None,
                 terminal_when=None,
                 behaviors=None,
                 behavior_configs=None,
                 num_scenarios=None,
                 benchmark_configs=None,
                 logger_name=None,
                 log_eval_avg_every=None,
                 checkpoint_dir=None,
                 merge_existing=False):

        self.benchmark_database = benchmark_database
        self.evaluators = evaluators or {}
        self.terminal_when = terminal_when or []
        if behaviors:
          self.behavior_configs = BehaviorConfig.configs_from_dict(behaviors)
        else:
          self.behavior_configs = behavior_configs or {}
        self.benchmark_configs = benchmark_configs or \
                                 self._create_configurations(num_scenarios)

        self.logger = logging.getLogger(logger_name or "BenchmarkRunner")
        self.logger.setLevel(logging.DEBUG)
        self.logger.info("Total number of {} configs to run".format(len(self.benchmark_configs)))
        self.existing_benchmark_result = BenchmarkResult()
        self.configs_to_run = self.benchmark_configs

        self.checkpoint_dir = checkpoint_dir or "checkpoints"
        if not os.path.exists(self.checkpoint_dir):
            os.makedirs(self.checkpoint_dir)

        if merge_existing:
            self.existing_benchmark_result = \
                BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir)
            self.logger.info("Merged {} processed configs in folder {}". \
                format(len(self.existing_benchmark_result.get_benchmark_configs()), checkpoint_dir)) 
            self.configs_to_run = self.get_configs_to_run(self.benchmark_configs, \
                                                            self.existing_benchmark_result)
            self.logger.info("Remaining  number of {} configs to run".format(len(self.configs_to_run)))

        self.exceptions_caught = []
        self.log_eval_avg_every = log_eval_avg_every
Example #19
0
    def merge_checkpoint_benchmark_results(checkpoint_dir):
        checkpoint_files = glob.glob(os.path.join(checkpoint_dir, "**/*.ckpnt"), recursive=True)
        merged_result_filename = BenchmarkRunner.get_merged_result_filename(checkpoint_dir)
        if os.path.exists(merged_result_filename):
          merged_result = BenchmarkResult.load_results(filename=merged_result_filename)
        else:
          merged_result = BenchmarkResult(file_name=merged_result_filename)
        # merge all checkpoints with new results
        for checkpoint_file in checkpoint_files:
          loaded_result = BenchmarkResult.load(os.path.abspath(checkpoint_file))
          merged_result.extend(loaded_result, file_level=True)
          logging.info("Extending with checkpoint {}".format(checkpoint_file))

        # delete checkpoints
        for checkpoint_file in checkpoint_files:
          if "merged_result" in checkpoint_file:
            continue
          os.remove(checkpoint_file)
          logging.info("Removed old checkpoint file {}".format(checkpoint_file))
        return merged_result