def test_bench_smoke(self): problem1 = TestProblem() problem2 = LSETestProblem() bench = Benchmark( problems=[problem1, problem2], configs=self.bench_config, n_reps=2, log_every=2, ) bench.run_benchmarks() out = bench.pandas() # assert problem metadata was correctly saved self.assertEqual(sorted(out["problem_name"].unique()), ["test lse problem", "test problem"]) self.assertEqual( sorted(out[out["problem_name"] == "test lse problem"] ["problem_threshold"].unique()), ["0.75"], ) # assert derived values work correctly self.assertEqual( sorted(out[out["problem_name"] == "test problem"] ["opt_strat_n_trials"].unique()), ["2", "3"], ) self.assertEqual( sorted(out[out["problem_name"] == "test lse problem"] ["opt_strat_n_trials"].unique()), ["1", "2"], ) # have as many final results as we expect self.assertTrue(len(out[out.final]) == bench.num_benchmarks) # have as many repetitions as we expect self.assertTrue(len(out.rep.unique()) == bench.n_reps) # reporting intervals are correct self.assertTrue((out[~out.final].trial_id % 2 == 0).all()) # we don't run extra trials total_trials = out.init_strat_n_trials.astype( int) + out.opt_strat_n_trials.astype(int) self.assertTrue((out.trial_id <= total_trials).all()) # ensure each simulation has a unique random seed self.assertTrue(out[out["final"]]["seed"].is_unique)
def run_benchmarks_with_checkpoints( out_path: str, benchmark_name: str, problems: List[Problem], configs: Mapping[str, Union[str, list]], global_seed: Optional[int] = None, n_chunks: int = 1, n_reps_per_chunk: int = 1, log_every: Optional[int] = None, checkpoint_every: int = 60, n_proc: int = 1, serial_debug: bool = False, ) -> None: """Runs a series of benchmarks, saving both final and intermediate results to .csv files. Benchmarks are run in sequential chunks, each of which runs all combinations of problems/configs/reps in parallel. This function should always be used using the "if __name__ == '__main__': ..." idiom. Args: out_path (str): The path to save the results to. benchmark_name (str): A name give to this set of benchmarks. Results will be saved in files named like "out_path/benchmark_name_chunk{chunk_number}_out.csv" problems (List[Problem]): Problem objects containing the test function to evaluate. configs (Mapping[str, Union[str, list]]): Dictionary of configs to run. Lists at leaves are used to construct a cartesian product of configurations. global_seed (int, optional): Global seed to use for reproducible benchmarks. Defaults to randomized seeds. n_chunks (int): The number of chunks to break the results into. Each chunk will contain at least 1 run of every combination of problem and config. n_reps_per_chunk (int, optional): Number of repetitions to run each problem/config in each chunk. log_every (int, optional): Logging interval during an experiment. Defaults to only logging at the end. checkpoint_every (int): Save intermediate results every checkpoint_every seconds. n_proc (int): Number of processors to use. serial_debug: debug serially? """ Path(out_path).mkdir(parents=True, exist_ok=True) # make an output folder if not exist if serial_debug: out_fname = Path(f"{out_path}/{benchmark_name}_out.csv") print(f"Starting {benchmark_name} benchmark (serial debug mode)...") bench = Benchmark( problems=problems, configs=configs, seed=global_seed, n_reps=n_reps_per_chunk * n_chunks, log_every=log_every, ) bench.run_benchmarks() final_results = bench.pandas() final_results.to_csv(out_fname) else: for chunk in range(n_chunks): out_fname = Path( f"{out_path}/{benchmark_name}_chunk{chunk}_out.csv") intermediate_fname = Path( f"{out_path}/{benchmark_name}_chunk{chunk}_checkpoint.csv") print(f"Starting {benchmark_name} benchmark... chunk {chunk} ") bench = PathosBenchmark( nproc=n_proc, problems=problems, configs=configs, seed=None, n_reps=n_reps_per_chunk, log_every=log_every, ) if global_seed is None: global_seed = int(np.random.randint(0, 200)) bench.seed = ( global_seed + chunk * bench.num_benchmarks ) # HACK. TODO: make num_benchmarks a property of bench configs bench.start_benchmarks() while not bench.is_done: time.sleep(checkpoint_every) collate_start = time.time() print( f"Checkpointing {benchmark_name} chunk {chunk}..., {len(bench.futures)}/{bench.num_benchmarks} alive" ) bench.collate_benchmarks(wait=False) temp_results = bench.pandas() if len(temp_results) > 0: temp_results[ "rep"] = temp_results["rep"] + n_reps_per_chunk * chunk temp_results.to_csv(intermediate_fname) print( f"Collate done in {time.time()-collate_start} seconds, {len(bench.futures)}/{bench.num_benchmarks} left" ) print(f"{benchmark_name} chunk {chunk} fully done!") final_results = bench.pandas() final_results[ "rep"] = final_results["rep"] + n_reps_per_chunk * chunk final_results.to_csv(out_fname)