def test_dump_and_load_results(self): result_data = random_result_data(size = 10) br = BenchmarkResult(result_dict=result_data) br.dump("./results") br_loaded = BenchmarkResult.load("./results") loaded_dict = br_loaded.get_result_dict() self.assertEqual(result_data, loaded_dict)
def run(self, viewer=None, maintain_history=False, checkpoint_every=None): results = [] histories = {} for idx, bmark_conf in enumerate(self.configs_to_run): self.logger.info( "Running config idx {} being {}/{}: Scenario {} of set \"{}\" for behavior \"{}\"" .format(bmark_conf.config_idx, idx, len(self.benchmark_configs) - 1, bmark_conf.scenario_idx, bmark_conf.scenario_set_name, bmark_conf.behavior_config.behavior_name)) bmark_conf = copy.deepcopy( bmark_conf) if self._deepcopy else bmark_conf result_dict, scenario_history = self._run_benchmark_config( bmark_conf, viewer, maintain_history) results.append(result_dict) histories[bmark_conf.config_idx] = scenario_history if self.log_eval_avg_every and (idx + 1) % self.log_eval_avg_every == 0: self._log_eval_average(results, self.configs_to_run) if checkpoint_every and (idx + 1) % checkpoint_every == 0: intermediate_result = BenchmarkResult(results, \ self.configs_to_run[0:idx+1], histories=histories) checkpoint_file = os.path.join(self.checkpoint_dir, self.get_checkpoint_file_name()) intermediate_result.dump(checkpoint_file, dump_configs=True, dump_histories=maintain_history) self.logger.info("Saved checkpoint {}".format(checkpoint_file)) benchmark_result = BenchmarkResult(results, self.configs_to_run, histories=histories) self.existing_benchmark_result.extend(benchmark_result) return self.existing_benchmark_result
def _log_eval_average(self, result_dct_list, configs): bresult = BenchmarkResult(result_dct_list, configs) df = bresult.get_data_frame() grouped = df.apply(pd.to_numeric, errors='ignore').groupby(bresult.get_evaluation_groups()).mean()[ self._evaluation_criteria()] self.logger.info("\n------------------- Current Evaluation Results ---------------------- \n Num. Results:{}\n {} \n \ ---------------------------------------------------------------------".format(len(result_dct_list), grouped.to_string()))
def __init__(self, benchmark_database=None, evaluators=None, terminal_when=None, behaviors=None, behavior_configs=None, num_scenarios=None, benchmark_configs=None, scenario_generation=None, logger_name=None, log_eval_avg_every=None, checkpoint_dir=None, merge_existing=False, deepcopy=True): self.benchmark_database = benchmark_database self.scenario_generation = scenario_generation self.evaluators = evaluators if isinstance(evaluators, EvaluationConfig) else EvaluationConfig(evaluators) self.terminal_when = terminal_when or [] if behaviors: self.behavior_configs = BehaviorConfig.configs_from_dict(behaviors) else: self.behavior_configs = behavior_configs or {} if benchmark_configs: self.benchmark_configs = benchmark_configs elif benchmark_database: self.benchmark_configs = \ self._create_configurations_from_database(num_scenarios) elif scenario_generation: self.benchmark_configs = \ self._create_configurations_from_scenario_generation(num_scenarios) else: self.benchmark_configs = [] # to be compatible when benchmark runner is used in ray actor self.logger = logging.getLogger(logger_name or "BenchmarkRunner") self.logger.setLevel(logging.DEBUG) self.logger.info("Total number of {} configs to run".format(len(self.benchmark_configs))) self.configs_to_run = self.benchmark_configs self._deepcopy = deepcopy self.checkpoint_dir = checkpoint_dir or "checkpoints" self.existing_benchmark_result = BenchmarkResult(file_name= \ BenchmarkRunner.get_merged_result_filename(self.checkpoint_dir)) if not os.path.exists(self.checkpoint_dir): os.makedirs(self.checkpoint_dir) if merge_existing: self.existing_benchmark_result = \ BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir) self.logger.info("Merged {} processed configs in folder {}". \ format(len(self.existing_benchmark_result.get_benchmark_configs()), checkpoint_dir)) self.configs_to_run = self.get_configs_to_run(self.benchmark_configs, \ self.existing_benchmark_result) self.logger.info("Remaining number of {} configs to run".format(len(self.configs_to_run))) self.exceptions_caught = [] self.log_eval_avg_every = log_eval_avg_every
def _log_eval_average(self, result_dct_list, configs): bresult = BenchmarkResult(result_dct_list, configs) df = bresult.get_data_frame() for eval_group in bresult.get_evaluation_groups(): if eval_group not in df.columns: df[eval_group] = np.nan df.fillna(-1, inplace=True) grouped = df.apply(pd.to_numeric, errors='ignore').groupby(bresult.get_evaluation_groups()).mean() self.logger.info("\n------------------- Current Evaluation Results ---------------------- \n Num. Results:{}\n {} \n \ ---------------------------------------------------------------------".format(len(result_dct_list), grouped.to_string()))
def test_dump_and_load_histories_one(self): result_num = 2 result_data = random_result_data(size = result_num) histories = random_history_data(result_num, 20) br = BenchmarkResult(result_dict=result_data, histories=histories) br.dump("./results_with_history", dump_histories=True, max_mb_per_file = 2) br_loaded = BenchmarkResult.load("./results_with_history") br_loaded.load_histories(config_idx_list = list(histories.keys())) loaded_histories = br_loaded.get_histories() self.assertEqual(histories, loaded_histories) loaded_dict = br_loaded.get_result_dict() self.assertEqual(result_data, loaded_dict)
def merge_checkpoint_benchmark_results(checkpoint_dir): checkpoint_files = glob.glob(os.path.join(checkpoint_dir, "**/*.ckpnt"), recursive=True) merged_result = BenchmarkResult() # merge all checkpoints with new results for checkpoint_file in checkpoint_files: logging.info("Loading checkpoint {}".format( os.path.abspath(checkpoint_file))) next_result = BenchmarkResult.load(os.path.abspath(checkpoint_file), \ load_configs=True, load_histories=True) merged_result.extend(next_result) # dump merged result if len(merged_result.get_result_dict()) > 0: logging.info("Dumping merged result") merged_result_filename = os.path.join(checkpoint_dir, "merged_results.ckpnt") merged_result.dump(merged_result_filename, \ dump_configs=True, dump_histories=True) # delete checkpoints for checkpoint_file in checkpoint_files: if checkpoint_file == merged_result_filename: continue os.remove(checkpoint_file) logging.info( "Removed old checkpoint file {}".format(checkpoint_file)) return merged_result
def dump(): ckpt_dir = [ os.path.join('/home/ekumar/master_thesis/code/hythe-src/checkpoints/', ck) for ck in os.listdir("/home/ekumar/master_thesis/code/hythe-src/checkpoints/") ] eval_criteria = {"goal_reached": lambda x: x} print(ckpt_dir) demos_dir = os.path.join('/home/ekumar/demos/') list_of_demos = [] for cdir in ckpt_dir: print(f"Extracting result {cdir}") result = BenchmarkResult.load_results(cdir) democ = DemonstrationCollector() democ._collection_result = result democ._directory = demos_dir demos = democ.ProcessCollectionResult(eval_criteria) list_of_demos.extend(demos) # make the demonstrations dir in the exp root os.makedirs( "/home/ekumar/output/experiments/exp_c76fc949-e95f-4774-91ba-6bec575ada37/demonstrations/generated_demonstrations" ) to_pickle( list_of_demos, "/home/ekumar/output/experiments/exp_c76fc949-e95f-4774-91ba-6bec575ada37/demonstrations/generated_demonstrations", "demonstrations") collector = DemonstrationCollector.load( "/home/ekumar/output/experiments/exp_c76fc949-e95f-4774-91ba-6bec575ada37/demonstrations/generated_demonstrations" ) print("Total demonstations found:", len(collector.GetDemonstrationExperiences())) return
def get_configs_to_run(benchmark_configs, existing_benchmark_result): existing_inds = existing_benchmark_result.get_benchmark_config_indices() required_inds = BenchmarkResult(benchmark_configs=benchmark_configs).get_benchmark_config_indices() missing_inds = list(set(required_inds) - set(existing_inds)) filtered_configs = filter(lambda bc : bc.config_idx in missing_inds, benchmark_configs) return list(filtered_configs)
def dummy_benchmark_results(): br = [ { "config_idx": 24, "collision": False, "rss": True, "behavior": "test1", "scen_idx": 12 }, { "config_idx": 1, "collision": True, "rss": False, "behavior": "test", "scen_idx": 10 }, { "config_idx": 500, "collision": True, "rss": True, "behavior": "test", "scen_idx": 112 }, ] behaviorc = BehaviorConfig("behavior_name", "behavior") benchmarkc = BenchmarkConfig(0, behaviorc, "test scenario", 0, "scenario_set_name") return BenchmarkResult(result_dict=br, benchmark_configs=[benchmarkc])
def run_benchmark_config(self, config_idx, **kwargs): for idx, bmark_conf in enumerate(self.benchmark_configs): if bmark_conf.config_idx == config_idx: result_dict, scenario_history = self._run_benchmark_config(copy.deepcopy(bmark_conf), **kwargs) return BenchmarkResult(result_dict, [bmark_conf], histories={config_idx : scenario_history}) self.logger.error("Config idx {} not found in benchmark configs. Skipping...".format(config_idx)) return
def __init__(self, benchmark_database=None, evaluators=None, terminal_when=None, behaviors=None, behavior_configs=None, num_scenarios=None, benchmark_configs=None, logger_name=None, log_eval_avg_every=None, checkpoint_dir=None, merge_existing=False): self.benchmark_database = benchmark_database self.evaluators = evaluators or {} self.terminal_when = terminal_when or [] if behaviors: self.behavior_configs = BehaviorConfig.configs_from_dict(behaviors) else: self.behavior_configs = behavior_configs or {} self.benchmark_configs = benchmark_configs or \ self._create_configurations(num_scenarios) self.logger = logging.getLogger(logger_name or "BenchmarkRunner") self.logger.setLevel(logging.DEBUG) self.logger.info("Total number of {} configs to run".format(len(self.benchmark_configs))) self.existing_benchmark_result = BenchmarkResult() self.configs_to_run = self.benchmark_configs self.checkpoint_dir = checkpoint_dir or "checkpoints" if not os.path.exists(self.checkpoint_dir): os.makedirs(self.checkpoint_dir) if merge_existing: self.existing_benchmark_result = \ BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir) self.logger.info("Merged {} processed configs in folder {}". \ format(len(self.existing_benchmark_result.get_benchmark_configs()), checkpoint_dir)) self.configs_to_run = self.get_configs_to_run(self.benchmark_configs, \ self.existing_benchmark_result) self.logger.info("Remaining number of {} configs to run".format(len(self.configs_to_run))) self.exceptions_caught = [] self.log_eval_avg_every = log_eval_avg_every
def test_make_scenarios_congruent(self): brst = BenchmarkResult(result_dict=dummy_benchmark_results(), benchmark_configs=None) analyzer = BenchmarkAnalyzer(benchmark_result=brst) # scenarios 5 and 12 in all lists congruent_list = analyzer.make_scenarios_congruent( configs_idx_lists=[[3, 2, 500, 11], [35, 1, 41], [500, 41, 12, 121]]) self.assertEqual(congruent_list[0], [11, 2])
def test_dump_and_partial_load(self): result_num = 100 confs, result_data, histories = random_benchmark_conf_data(result_num, 2000000, hist_size=1500000) br = BenchmarkResult(result_dict=result_data, benchmark_configs=confs, histories=histories) br.dump("./results_all", dump_configs=True, dump_histories=True, max_mb_per_file = 5) br_loaded = BenchmarkResult.load("./results_all") loaded_dict = br_loaded.get_result_dict() self.assertEqual(br.get_result_dict(), loaded_dict) loaded_configs_idx = list(range(10, 20)) processed_files = br_loaded.load_benchmark_configs(config_idx_list = loaded_configs_idx) loaded_confs = br_loaded.get_benchmark_configs() self.assertEqual(len(loaded_confs), 10) # 2mb per conf, max 5 mb per file -> 2 confs per file -> 10/2 = 5files self.assertEqual(len(processed_files), 5) for conf_idx in loaded_configs_idx: self.assertEqual(br_loaded.get_benchmark_config(conf_idx), confs[conf_idx]) loaded_configs_idx = list(range(10, 27)) processed_files = br_loaded.load_histories(config_idx_list = loaded_configs_idx) loaded_histories = br_loaded.get_histories() self.assertEqual(len(loaded_histories), 18) # one more as specified since it was in the last file # 1.5mb per history, max 5 mb per file -> 3 confs per file -> 17/3 = 6files self.assertEqual(len(processed_files), 6) for conf_idx in loaded_configs_idx: self.assertEqual(br_loaded.get_history(conf_idx), histories[conf_idx])
def load(directory): collector = DemonstrationCollector() collection_result_fullname = os.path.join( directory, DemonstrationCollector.collection_result_filename()) if os.path.exists(collection_result_fullname): collector._collection_result = BenchmarkResult.load( collection_result_fullname) demonstration_fullname = os.path.join( directory, DemonstrationCollector.demonstrations_filename()) if os.path.exists(demonstration_fullname): collector._demonstrations = from_pickle( directory, DemonstrationCollector.demonstrations_filename()) collector._directory = directory return collector
def test_find_config_idx(self): brst = BenchmarkResult(result_dict=dummy_benchmark_results(), benchmark_configs=None) analyzer = BenchmarkAnalyzer(benchmark_result=brst) configs_found = analyzer.find_configs({"collision": lambda x: x}) self.assertEqual(configs_found, [1, 500, 11]) configs_found = analyzer.find_configs({"collision": lambda x: not x}) self.assertEqual(configs_found, [24, 2, 41, 3, 12, 121, 35, 42]) configs_found = analyzer.find_configs({"metric1": lambda x: x == 0.1}) self.assertEqual(configs_found, [24, 11]) configs_found = analyzer.find_configs({ "collision": lambda x: not x, "metric1": lambda x: x == 0.1 }) self.assertEqual(configs_found, [24]) configs_found = analyzer.find_configs({ "collision": lambda x: x, "metric1": lambda x: x > 1 }) self.assertEqual(configs_found, [1, 500]) configs_found = analyzer.find_configs({ "collision": lambda x: x, "behavior": lambda x: x == "test2" }) self.assertEqual(configs_found, []) configs_found = analyzer.find_configs(scenario_idx_list=[4, 10, 7]) self.assertEqual(configs_found, [121, 1, 42]) configs_found = analyzer.find_configs({"collision": lambda x: not x}, scenario_idx_list=[4, 10, 7]) self.assertEqual(configs_found, [121, 42]) configs_found = analyzer.find_configs( {"collision": lambda x: x}, scenarios_as_in_configs=[24, 11, 121, 1]) self.assertEqual(configs_found, [11, 1]) configs_found = analyzer.find_configs(scenario_idx_list=[4, 10, 7], in_configs=[1, 42]) self.assertEqual(configs_found, [1, 42])
def test_dump_and_load_benchmark_configs(self): result_num = 100 confs, result_data, _ = random_benchmark_conf_data(result_num, 2000000) br = BenchmarkResult(result_dict=result_data, benchmark_configs=confs) br.dump("./results_with_confs", dump_configs=True, max_mb_per_file = 5) br_loaded = BenchmarkResult.load("./results_with_confs") br_loaded.load_benchmark_configs(config_idx_list = list(range(0, result_num))) loaded_confs = br_loaded.get_benchmark_configs() self.assertEqual(confs, loaded_confs) loaded_dict = br_loaded.get_result_dict() self.assertEqual(br.get_result_dict(), loaded_dict)
def run(self, viewer=None, maintain_history=False, checkpoint_every=None): results_tmp = ray.get([ actor.run.remote(viewer, maintain_history, checkpoint_every) for actor in self.actors ]) result_dict = [] benchmark_configs = [] histories = {} for result_tmp in results_tmp: result_dict.extend(result_tmp.get_result_dict()) benchmark_configs.extend(result_tmp.get_benchmark_configs()) histories.update(result_tmp.get_histories()) benchmark_result = BenchmarkResult(result_dict, benchmark_configs, histories=histories) self.existing_benchmark_result.extend(benchmark_result) return self.existing_benchmark_result
def merge_checkpoint_benchmark_results(checkpoint_dir): checkpoint_files = glob.glob(os.path.join(checkpoint_dir, "**/*.ckpnt"), recursive=True) merged_result_filename = BenchmarkRunner.get_merged_result_filename(checkpoint_dir) if os.path.exists(merged_result_filename): merged_result = BenchmarkResult.load_results(filename=merged_result_filename) else: merged_result = BenchmarkResult(file_name=merged_result_filename) # merge all checkpoints with new results for checkpoint_file in checkpoint_files: loaded_result = BenchmarkResult.load(os.path.abspath(checkpoint_file)) merged_result.extend(loaded_result, file_level=True) logging.info("Extending with checkpoint {}".format(checkpoint_file)) # delete checkpoints for checkpoint_file in checkpoint_files: if "merged_result" in checkpoint_file: continue os.remove(checkpoint_file) logging.info("Removed old checkpoint file {}".format(checkpoint_file)) return merged_result
def test_database_run_and_analyze(self): dbs = DatabaseSerializer(test_scenarios=2, test_world_steps=3, num_serialize_scenarios=2) # to find database files cwd = os.getcwd() if not debug: os.chdir("../benchmark_database/") else: os.chdir("bazel-bin/bark/benchmark/tests/py_benchmark_process_tests.runfiles/benchmark_database") dbs.process("data/database1") local_release_filename = dbs.release(version="test") db = BenchmarkDatabase(database_root=local_release_filename) evaluators = {"success" : "EvaluatorGoalReached", "collision" : "EvaluatorCollisionEgoAgent", "max_steps": "EvaluatorStepCount"} terminal_when = {"collision" :lambda x: x, "max_steps": lambda x : x>2} params = ParameterServer() # only for evaluated agents not passed to scenario! behaviors_tested = {"IDM": BehaviorIDMClassic(params), "Const" : BehaviorConstantAcceleration(params)} benchmark_runner = BenchmarkRunnerMP(benchmark_database=db, evaluators=evaluators, terminal_when=terminal_when, behaviors=behaviors_tested, log_eval_avg_every=2) result = benchmark_runner.run(maintain_history=True) result.dump(os.path.join("./benchmark_results"), dump_configs=True, \ dump_histories=True, max_mb_per_file=1) result_loaded = BenchmarkResult.load(os.path.join("./benchmark_results")) result_loaded.load_histories() result_loaded.load_benchmark_configs() params2 = ParameterServer() fig = plt.figure(figsize=[10, 10]) viewer = MPViewer( params=params2, center=[5112, 5165], y_length = 120, enforce_y_length=True, axis = fig.gca()) analyzer = BenchmarkAnalyzer(benchmark_result=result_loaded) configs = analyzer.find_configs(criteria={"behavior": lambda x: x=="IDM", "success": lambda x : not x}) configs_const = analyzer.find_configs(criteria={"behavior": lambda x: x=="Const", "success": lambda x : not x}) os.chdir(cwd) #analyzer.visualize(configs_idx_list = configs, # viewer = viewer, real_time_factor=10, fontsize=12) plt.close(fig) fig, (ax1, ax2) = plt.subplots(1, 2) viewer1 = MPViewer( params=params2, center=[5112, 5165], y_length = 120, enforce_y_length=True, axis = ax1) viewer2 = MPViewer( params=params2, center=[5112, 5165], y_length = 120, enforce_y_length=True, axis = ax2) analyzer.visualize(configs_idx_list = [configs[1:3], configs_const[1:3]], viewer = [viewer1, viewer2], viewer_names=["IDM", "ConstVelocity"], real_time_factor=10, fontsize=12)
class BenchmarkRunner: def __init__(self, benchmark_database=None, evaluators=None, terminal_when=None, behaviors=None, behavior_configs=None, num_scenarios=None, benchmark_configs=None, logger_name=None, log_eval_avg_every=None, checkpoint_dir=None, merge_existing=False, deepcopy=True): self.benchmark_database = benchmark_database self.evaluators = evaluators or {} self.terminal_when = terminal_when or [] if behaviors: self.behavior_configs = BehaviorConfig.configs_from_dict(behaviors) else: self.behavior_configs = behavior_configs or {} self.benchmark_configs = benchmark_configs or \ self._create_configurations(num_scenarios) self.logger = logging.getLogger(logger_name or "BenchmarkRunner") self.logger.setLevel(logging.DEBUG) self.logger.info("Total number of {} configs to run".format( len(self.benchmark_configs))) self.existing_benchmark_result = BenchmarkResult() self.configs_to_run = self.benchmark_configs self._deepcopy = deepcopy self.checkpoint_dir = checkpoint_dir or "checkpoints" if not os.path.exists(self.checkpoint_dir): os.makedirs(self.checkpoint_dir) if merge_existing: self.existing_benchmark_result = \ BenchmarkRunner.merge_checkpoint_benchmark_results(checkpoint_dir) self.logger.info("Merged {} processed configs in folder {}". \ format(len(self.existing_benchmark_result.get_benchmark_configs()), checkpoint_dir)) self.configs_to_run = self.get_configs_to_run(self.benchmark_configs, \ self.existing_benchmark_result) self.logger.info("Remaining number of {} configs to run".format( len(self.configs_to_run))) self.exceptions_caught = [] self.log_eval_avg_every = log_eval_avg_every def get_checkpoint_file_name(self): return "benchmark_runner.ckpnt" def clear_checkpoint_dir(self): files = glob.glob(os.path.join(self.checkpoint_dir, "*.ckpnt")) for f in files: os.remove(f) @staticmethod def merge_checkpoint_benchmark_results(checkpoint_dir): checkpoint_files = glob.glob(os.path.join(checkpoint_dir, "**/*.ckpnt"), recursive=True) merged_result = BenchmarkResult() # merge all checkpoints with new results for checkpoint_file in checkpoint_files: logging.info("Loading checkpoint {}".format( os.path.abspath(checkpoint_file))) next_result = BenchmarkResult.load(os.path.abspath(checkpoint_file), \ load_configs=True, load_histories=True) merged_result.extend(next_result) # dump merged result if len(merged_result.get_result_dict()) > 0: logging.info("Dumping merged result") merged_result_filename = os.path.join(checkpoint_dir, "merged_results.ckpnt") merged_result.dump(merged_result_filename, \ dump_configs=True, dump_histories=True) # delete checkpoints for checkpoint_file in checkpoint_files: if checkpoint_file == merged_result_filename: continue os.remove(checkpoint_file) logging.info( "Removed old checkpoint file {}".format(checkpoint_file)) return merged_result @staticmethod def get_configs_to_run(benchmark_configs, existing_benchmark_result): existing_inds = existing_benchmark_result.get_benchmark_config_indices( ) required_inds = BenchmarkResult(benchmark_configs=benchmark_configs ).get_benchmark_config_indices() missing_inds = list(set(required_inds) - set(existing_inds)) filtered_configs = filter(lambda bc: bc.config_idx in missing_inds, benchmark_configs) return list(filtered_configs) def _create_configurations(self, num_scenarios=None): benchmark_configs = [] for behavior_config in self.behavior_configs: # run over all scenario generators from benchmark database for scenario_generator, scenario_set_name, scenario_set_param_desc in self.benchmark_database: for scenario, scenario_idx in scenario_generator: if num_scenarios and scenario_idx >= num_scenarios: break benchmark_config = \ BenchmarkConfig( len(benchmark_configs), behavior_config, scenario, scenario_idx, scenario_set_name, scenario_set_param_desc ) benchmark_configs.append(benchmark_config) return benchmark_configs def run(self, viewer=None, maintain_history=False, checkpoint_every=None): results = [] histories = {} for idx, bmark_conf in enumerate(self.configs_to_run): self.logger.info( "Running config idx {} being {}/{}: Scenario {} of set \"{}\" for behavior \"{}\"" .format(bmark_conf.config_idx, idx, len(self.benchmark_configs) - 1, bmark_conf.scenario_idx, bmark_conf.scenario_set_name, bmark_conf.behavior_config.behavior_name)) bmark_conf = copy.deepcopy( bmark_conf) if self._deepcopy else bmark_conf result_dict, scenario_history = self._run_benchmark_config( bmark_conf, viewer, maintain_history) results.append(result_dict) histories[bmark_conf.config_idx] = scenario_history if self.log_eval_avg_every and (idx + 1) % self.log_eval_avg_every == 0: self._log_eval_average(results, self.configs_to_run) if checkpoint_every and (idx + 1) % checkpoint_every == 0: intermediate_result = BenchmarkResult(results, \ self.configs_to_run[0:idx+1], histories=histories) checkpoint_file = os.path.join(self.checkpoint_dir, self.get_checkpoint_file_name()) intermediate_result.dump(checkpoint_file, dump_configs=True, dump_histories=maintain_history) self.logger.info("Saved checkpoint {}".format(checkpoint_file)) benchmark_result = BenchmarkResult(results, self.configs_to_run, histories=histories) self.existing_benchmark_result.extend(benchmark_result) return self.existing_benchmark_result def run_benchmark_config(self, config_idx, **kwargs): for idx, bmark_conf in enumerate(self.benchmark_configs): if bmark_conf.config_idx == config_idx: bmark_conf = copy.deepcopy( bmark_conf) if self._deepcopy else bmark_conf result_dict, scenario_history = self._run_benchmark_config( bmark_conf, **kwargs) return BenchmarkResult( result_dict, [bmark_conf], histories={config_idx: scenario_history}) self.logger.error( "Config idx {} not found in benchmark configs. Skipping...".format( config_idx)) return def _run_benchmark_config(self, benchmark_config, viewer=None, maintain_history=False): scenario = benchmark_config.scenario behavior = benchmark_config.behavior_config.behavior parameter_server = ParameterServer(json=scenario._json_params) scenario_history = [] step = 0 try: world = scenario.GetWorldState() except Exception as e: self.logger.error( "For config-idx {}, Exception thrown in scenario.GetWorldState: {}" .format(benchmark_config.config_idx, e)) self._append_exception(benchmark_config, e) return { **benchmark_config.as_dict(), "step": step, "Terminal": "exception_raised" } # if behavior is not None (None specifies that also the default model can be evalauted) if behavior: world.agents[scenario._eval_agent_ids[0]].behavior_model = behavior if maintain_history: self._append_to_scenario_history(scenario_history, world, scenario) self._reset_evaluators(world, scenario._eval_agent_ids) step_time = parameter_server["Simulation"]["StepTime", "", 0.2] if not isinstance(step_time, float): step_time = 0.2 terminal = False terminal_why = None while not terminal: try: evaluation_dict = self._get_evalution_dict(world) except Exception as e: self.logger.error( "For config-idx {}, Exception thrown in evaluation: {}". format(benchmark_config.config_idx, e)) terminal_why = "exception_raised" self._append_exception(benchmark_config, e) evaluation_dict = {} break terminal, terminal_why = self._is_terminal(evaluation_dict) if not terminal: if viewer: viewer.drawWorld( world, scenario._eval_agent_ids, scenario_idx=benchmark_config.scenario_idx) viewer.show(block=False) time.sleep(step_time) viewer.clear() try: world.Step(step_time) except Exception as e: self.logger.error( "For config-idx {}, Exception thrown in world.Step: {}" .format(benchmark_config.config_idx, e)) terminal_why = "exception_raised" self._append_exception(benchmark_config, e) break if maintain_history: self._append_to_scenario_history(scenario_history, world, scenario) step += 1 dct = { **benchmark_config.as_dict(), "step": step, **evaluation_dict, "Terminal": terminal_why } return dct, scenario_history def _append_to_scenario_history(self, scenario_history, world, scenario): scenario = Scenario(agent_list=list(world.agents.values()), map_file_name=scenario.map_file_name, eval_agent_ids=scenario.eval_agent_ids, json_params=scenario.json_params) scenario_history.append(scenario.copy()) def _append_exception(self, benchmark_config, exception): self.exceptions_caught.append((benchmark_config.config_idx, exception)) def _reset_evaluators(self, world, eval_agent_ids): for evaluator_name, evaluator_params in self.evaluators.items(): evaluator_bark = None if isinstance(evaluator_params, str): try: evaluator_bark = eval( "{}(eval_agent_ids[0])".format(evaluator_params)) except: evaluator_bark = eval("{}()".format(evaluator_params)) elif isinstance(evaluator_params, dict): evaluator_bark = eval( "{}(agent_id=eval_agent_ids[0], **evaluator_params['params'])" .format(evaluator_params["type"])) else: raise ValueError world.AddEvaluator(evaluator_name, evaluator_bark) def _evaluation_criteria(self): bark_evals = [eval_crit for eval_crit, _ in self.evaluators.items()] bark_evals.append("step") return bark_evals def _get_evalution_dict(self, world): return world.Evaluate() def _is_terminal(self, evaluation_dict): terminal = False terminal_why = [] for evaluator_name, function in self.terminal_when.items(): if function(evaluation_dict[evaluator_name]): terminal = True terminal_why.append(evaluator_name) return terminal, terminal_why def _log_eval_average(self, result_dct_list, configs): bresult = BenchmarkResult(result_dct_list, configs) df = bresult.get_data_frame() for eval_group in bresult.get_evaluation_groups(): if eval_group not in df.columns: df[eval_group] = np.nan df.fillna(-1, inplace=True) grouped = df.apply(pd.to_numeric, errors='ignore').groupby( bresult.get_evaluation_groups()).mean()[ self._evaluation_criteria()] self.logger.info( "\n------------------- Current Evaluation Results ---------------------- \n Num. Results:{}\n {} \n \ ---------------------------------------------------------------------".format( len(result_dct_list), grouped.to_string()))
def run(self, viewer=None, maintain_history=False, checkpoint_every=None): last_results = [] last_histories = {} last_run_configs = [] results = [] checkpoint_file = os.path.abspath(os.path.join(self.checkpoint_dir, self.get_checkpoint_file_name())) last_result_file = os.path.abspath(os.path.join(self.checkpoint_dir, "tmp_{}".format(self.get_checkpoint_file_name()))) checkpoint_result = BenchmarkResult(file_name=checkpoint_file) for idx, bmark_conf in enumerate(self.configs_to_run): self.logger.info("Running config idx {} being {}/{}: Scenario {} of set \"{}\" for behavior \"{}\"".format( bmark_conf.config_idx, idx, len(self.benchmark_configs) - 1, bmark_conf.scenario_idx, bmark_conf.scenario_set_name, bmark_conf.behavior_config.behavior_name)) bmark_conf = copy.deepcopy(bmark_conf) if self._deepcopy else bmark_conf result_dict, scenario_history = self._run_benchmark_config(bmark_conf, viewer, maintain_history) results.append(result_dict) last_results.append(result_dict) last_histories[bmark_conf.config_idx] = scenario_history last_run_configs.append(bmark_conf) if self.log_eval_avg_every and (idx + 1) % self.log_eval_avg_every == 0: self._log_eval_average(results, self.configs_to_run) if checkpoint_every and (idx+1) % checkpoint_every == 0: # append results since last checkpoint last_benchmark_result = BenchmarkResult(result_dict=last_results, file_name = last_result_file, \ benchmark_configs=last_run_configs, histories=last_histories) last_benchmark_result.dump(last_result_file, dump_configs=True, dump_histories=maintain_history, append=False) checkpoint_result.extend(benchmark_result=last_benchmark_result, file_level=True) self.logger.info("Extended checkpoint {} with last result.".format(checkpoint_file)) last_histories.clear() last_run_configs.clear() last_results.clear() # append results of last run last_benchmark_result = BenchmarkResult(result_dict=last_results, file_name = last_result_file, \ benchmark_configs=last_run_configs, histories=last_histories) last_benchmark_result.dump(last_result_file, dump_configs=True, dump_histories=maintain_history, append=False) checkpoint_result.extend(benchmark_result=last_benchmark_result, file_level=True) os.remove(last_result_file) self.logger.info("Extended checkpoint {} with final result.".format(checkpoint_file)) checkpoint_result.extend(benchmark_result=self.existing_benchmark_result, file_level=True) return checkpoint_result
def test_extend_from_file(self): try: os.remove("./br1") os.remove("./br2") os.remove("./br3") except: pass result_num = 100 confs, result_data, histories1 = random_benchmark_conf_data(result_num, 2000000, hist_size=1500000, offset=0) br1 = BenchmarkResult(result_dict=result_data, benchmark_configs=confs, histories=histories1) br1.dump("./br1", dump_histories=True, dump_configs=True) br1_df = br1.get_data_frame().copy() result_num = 30 confs2, result_data2, histories2 = random_benchmark_conf_data(result_num, 2000000, hist_size=1500000, offset=200) br2 = BenchmarkResult(result_dict=result_data2, benchmark_configs=confs2, histories=histories2) br2.dump(filename="./br2", dump_histories=True, dump_configs=True) result_num = 10 confs3, result_data3, histories3 = random_benchmark_conf_data(result_num, 2000000, hist_size=1500000, offset=400) br3 = BenchmarkResult(result_dict=result_data3, benchmark_configs=confs3, histories=histories3) br3.dump(filename="./br3", dump_histories=True, dump_configs=True) br1.extend(benchmark_result=br2, file_level=True) br1.extend(benchmark_result=br3, file_level=True) br_loaded = BenchmarkResult.load("./br1", load_histories=True, load_configs=True) df_desired = br1_df df_desired = pd.concat([df_desired, br2.get_data_frame()]) df_desired = pd.concat([df_desired, br3.get_data_frame()]) self.assertEqual(len(br_loaded.get_data_frame().index), len(df_desired.index)) extended_confs = br_loaded.get_benchmark_configs() self.assertEqual(len(extended_confs), 140) extended_histories = br_loaded.get_histories() self.assertEqual(len(extended_histories), 140) extended_histories = histories1 extended_histories.update(histories2) extended_histories.update(histories3) for bc in extended_confs: self.assertEqual(br_loaded.get_history(bc.config_idx), extended_histories[bc.config_idx])