def test_bench_smoke(self): logger = BenchmarkLogger(log_every=2) problem = TestProblem(lb=[0], ub=[1]) bench = Benchmark( problem=problem, logger=logger, configs=self.bench_config, n_reps=2 ) bench.run_benchmarks() out = bench.logger.pandas() # have as many final results as we expect self.assertTrue(len(out[out.final]) == bench.num_benchmarks) # have as many repetitions as we expect self.assertTrue(len(out.rep.unique()) == bench.n_reps) # reporting intervals are correct self.assertTrue((out[~out.final].trial_id % 2 == 0).all()) # we don't run extra trials total_trials = out.SobolStrategy_n_trials.astype( int ) + out.ModelWrapperStrategy_n_trials.astype(int) self.assertTrue((out.trial_id <= total_trials).all())
def test_add_bench(self): logger = BenchmarkLogger(log_every=2) problem = TestProblem(lb=[0], ub=[1]) bench_1 = Benchmark( problem=problem, logger=logger, configs=self.bench_config, global_seed=2, n_reps=2, ) bench_2 = Benchmark( problem=problem, logger=logger, configs=self.bench_config, global_seed=2, n_reps=2, ) bench_combined = bench_1 + bench_2 three_bench = combine_benchmarks(bench_1, bench_2, bench_1) self.assertTrue((len(bench_combined.combinations) == 12)) self.assertTrue((len(three_bench.combinations) == 18)) self.assertTrue((len(bench_1.combinations) == 6)) self.assertTrue((len(bench_2.combinations) == 6))
def test_nonmonotonic_single_lse_eval(self): config = { "common": { "outcome_type": "single_probit", "strategy_names": "[init_strat, opt_strat]", "acqf": "MCLevelSetEstimation", "model": "GPClassificationModel", }, "init_strat": { "generator": "SobolGenerator", "n_trials": 50 }, "opt_strat": { "generator": "OptimizeAcqfGenerator", "n_trials": 1 }, "MCLevelSetEstimation": { "target": 0.75, "beta": 3.98, }, "GPClassificationModel": { "inducing_size": 10, "mean_covar_factory": "default_mean_covar_factory", }, "OptimizeAcqfGenerator": { "restarts": 10, "samps": 1000, }, } problem = LSETestProblem() bench = Benchmark(problems=[problem], configs=config, log_every=100) _, strat = bench.run_experiment(problem, bench.combinations[0], 0, 0) e = problem.evaluate(strat) self.assertTrue(e["mean_square_err_p"] < 0.05)
def test_bench_smoke(self): problem1 = TestProblem() problem2 = LSETestProblem() bench = Benchmark( problems=[problem1, problem2], configs=self.bench_config, n_reps=2, log_every=2, ) bench.run_benchmarks() out = bench.pandas() # assert problem metadata was correctly saved self.assertEqual(sorted(out["problem_name"].unique()), ["test lse problem", "test problem"]) self.assertEqual( sorted(out[out["problem_name"] == "test lse problem"] ["problem_threshold"].unique()), ["0.75"], ) # assert derived values work correctly self.assertEqual( sorted(out[out["problem_name"] == "test problem"] ["opt_strat_n_trials"].unique()), ["2", "3"], ) self.assertEqual( sorted(out[out["problem_name"] == "test lse problem"] ["opt_strat_n_trials"].unique()), ["1", "2"], ) # have as many final results as we expect self.assertTrue(len(out[out.final]) == bench.num_benchmarks) # have as many repetitions as we expect self.assertTrue(len(out.rep.unique()) == bench.n_reps) # reporting intervals are correct self.assertTrue((out[~out.final].trial_id % 2 == 0).all()) # we don't run extra trials total_trials = out.init_strat_n_trials.astype( int) + out.opt_strat_n_trials.astype(int) self.assertTrue((out.trial_id <= total_trials).all()) # ensure each simulation has a unique random seed self.assertTrue(out[out["final"]]["seed"].is_unique)
def test_monotonic_single_lse_eval(self): config = { "common": { "outcome_type": "single_probit", "strategy_names": "[init_strat, opt_strat]", "acqf": "MonotonicMCLSE", "model": "MonotonicRejectionGP", }, "init_strat": { "generator": "SobolGenerator", "n_trials": 50 }, "opt_strat": { "generator": "MonotonicRejectionGenerator", "n_trials": 1 }, "SobolGenerator": { "seed": 1 }, "MonotonicMCLSE": { "target": 0.75, "beta": 3.98, }, "MonotonicRejectionGP": { "inducing_size": 10, "mean_covar_factory": "monotonic_mean_covar_factory", "monotonic_idxs": "[1]", }, "MonotonicRejectionGenerator": { "model_gen_options": { "num_restarts": 10, "raw_samples": 1000, } }, } problem = LSETestProblem() bench = Benchmark(problems=[problem], configs=config, log_every=100) _, strat = bench.run_experiment(problem, bench.combinations[0], 0, 0) e = problem.evaluate(strat) self.assertTrue(e["mean_square_err_p"] < 0.05)
def test_monotonic_single_lse_eval(self): config = { "common": { "lb": "[-1, -1]", "ub": "[1, 1]", "outcome_type": "single_probit", }, "experiment": { "acqf": "MonotonicMCLSE", "modelbridge_cls": "MonotonicSingleProbitModelbridge", "init_strat_cls": "SobolStrategy", "opt_strat_cls": "ModelWrapperStrategy", "model": "MonotonicRejectionGP", }, "MonotonicMCLSE": { "target": 0.75, "beta": 3.98, }, "MonotonicRejectionGP": { "inducing_size": 10, "mean_covar_factory": "monotonic_mean_covar_factory", }, "MonotonicSingleProbitModelbridge": { "restarts": 10, "samps": 1000, }, "SobolStrategy": { "n_trials": 50, }, "ModelWrapperStrategy": { "n_trials": 1, }, } problem = TestProblem(lb=[-1, -1], ub=[1, 1]) logger = BenchmarkLogger(log_every=100) bench = Benchmark(problem=problem, configs=config, logger=logger) strat = bench.run_experiment(bench.combinations[0], logger, 0, 0) e = problem.evaluate(strat) self.assertTrue(e["mean_square_err_p"] < 0.05)
def run_benchmarks_with_checkpoints( out_path: str, benchmark_name: str, problems: List[Problem], configs: Mapping[str, Union[str, list]], global_seed: Optional[int] = None, n_chunks: int = 1, n_reps_per_chunk: int = 1, log_every: Optional[int] = None, checkpoint_every: int = 60, n_proc: int = 1, serial_debug: bool = False, ) -> None: """Runs a series of benchmarks, saving both final and intermediate results to .csv files. Benchmarks are run in sequential chunks, each of which runs all combinations of problems/configs/reps in parallel. This function should always be used using the "if __name__ == '__main__': ..." idiom. Args: out_path (str): The path to save the results to. benchmark_name (str): A name give to this set of benchmarks. Results will be saved in files named like "out_path/benchmark_name_chunk{chunk_number}_out.csv" problems (List[Problem]): Problem objects containing the test function to evaluate. configs (Mapping[str, Union[str, list]]): Dictionary of configs to run. Lists at leaves are used to construct a cartesian product of configurations. global_seed (int, optional): Global seed to use for reproducible benchmarks. Defaults to randomized seeds. n_chunks (int): The number of chunks to break the results into. Each chunk will contain at least 1 run of every combination of problem and config. n_reps_per_chunk (int, optional): Number of repetitions to run each problem/config in each chunk. log_every (int, optional): Logging interval during an experiment. Defaults to only logging at the end. checkpoint_every (int): Save intermediate results every checkpoint_every seconds. n_proc (int): Number of processors to use. serial_debug: debug serially? """ Path(out_path).mkdir(parents=True, exist_ok=True) # make an output folder if not exist if serial_debug: out_fname = Path(f"{out_path}/{benchmark_name}_out.csv") print(f"Starting {benchmark_name} benchmark (serial debug mode)...") bench = Benchmark( problems=problems, configs=configs, seed=global_seed, n_reps=n_reps_per_chunk * n_chunks, log_every=log_every, ) bench.run_benchmarks() final_results = bench.pandas() final_results.to_csv(out_fname) else: for chunk in range(n_chunks): out_fname = Path( f"{out_path}/{benchmark_name}_chunk{chunk}_out.csv") intermediate_fname = Path( f"{out_path}/{benchmark_name}_chunk{chunk}_checkpoint.csv") print(f"Starting {benchmark_name} benchmark... chunk {chunk} ") bench = PathosBenchmark( nproc=n_proc, problems=problems, configs=configs, seed=None, n_reps=n_reps_per_chunk, log_every=log_every, ) if global_seed is None: global_seed = int(np.random.randint(0, 200)) bench.seed = ( global_seed + chunk * bench.num_benchmarks ) # HACK. TODO: make num_benchmarks a property of bench configs bench.start_benchmarks() while not bench.is_done: time.sleep(checkpoint_every) collate_start = time.time() print( f"Checkpointing {benchmark_name} chunk {chunk}..., {len(bench.futures)}/{bench.num_benchmarks} alive" ) bench.collate_benchmarks(wait=False) temp_results = bench.pandas() if len(temp_results) > 0: temp_results[ "rep"] = temp_results["rep"] + n_reps_per_chunk * chunk temp_results.to_csv(intermediate_fname) print( f"Collate done in {time.time()-collate_start} seconds, {len(bench.futures)}/{bench.num_benchmarks} left" ) print(f"{benchmark_name} chunk {chunk} fully done!") final_results = bench.pandas() final_results[ "rep"] = final_results["rep"] + n_reps_per_chunk * chunk final_results.to_csv(out_fname)
def plot_audiometric_lse_grids(sobol_trials, opt_trials, phenotype="Metabolic+Sensory", beta=2): """ Generates Fig. 8 """ logger = BenchmarkLogger(log_every=5) bench_rbf = { "common": { "pairwise": False, "target": 0.75 }, "experiment": { "acqf": "MonotonicMCLSE", "modelbridge_cls": "MonotonicSingleProbitModelbridge", "init_strat_cls": "SobolStrategy", "opt_strat_cls": "ModelWrapperStrategy", "model": "MonotonicRejectionGP", "parnames": "[context,intensity]", }, "MonotonicMCLSE": { "target": 0.75, "beta": 3.98, }, "MonotonicRejectionGP": { "inducing_size": 100, "mean_covar_factory": [ "monotonic_mean_covar_factory", ], "monotonic_idxs": ["[1]", "[]"], "uniform_idxs": "[]", }, "MonotonicSingleProbitModelbridge": { "restarts": 10, "samps": 1000 }, "SobolStrategy": { "n_trials": [sobol_trials], }, "ModelWrapperStrategy": { "n_trials": [opt_trials], "refit_every": [refit_every], }, } bench_song = { "common": { "pairwise": False, "target": 0.75 }, "experiment": { "acqf": "BernoulliMCMutualInformation", "modelbridge_cls": "SingleProbitModelbridgeWithSongHeuristic", "init_strat_cls": "SobolStrategy", "opt_strat_cls": "ModelWrapperStrategy", "model": "GPClassificationModel", "parnames": "[context,intensity]", }, "GPClassificationModel": { "inducing_size": 100, "dim": 2, "mean_covar_factory": [ "song_mean_covar_factory", ], }, "SingleProbitModelbridgeWithSongHeuristic": { "restarts": 10, "samps": 1000 }, "SobolStrategy": { "n_trials": [sobol_trials], }, "ModelWrapperStrategy": { "n_trials": [opt_trials], "refit_every": [refit_every], }, } all_bench_configs = [bench_rbf, bench_song] testfun = make_songetal_testfun(phenotype=phenotype, beta=beta) class AudiometricProblem(LSEProblem, Problem): def f(self, x): return testfun(x) lb = [-3, -20] ub = [4, 120] benches = [] problem = AudiometricProblem(lb, ub) for config in all_bench_configs: full_config = copy(config) full_config["common"]["lb"] = str(lb) full_config["common"]["ub"] = str(ub) benches.append( Benchmark( problem=problem, logger=logger, configs=full_config, global_seed=global_seed, n_reps=1, )) combo_bench = combine_benchmarks(*benches) strats = [] for config in combo_bench.combinations: strat = combo_bench.run_experiment(config, logger, seed=global_seed, rep=0) strats.append(strat) titles = [ "Monotonic RBF Model, LSE (ours)", "Nonmonotonic RBF Model, LSE (ours)", "Linear-Additive Model, BALD", ] fig, axes = plt.subplots(2, 2, figsize=(7.5, 6.5)) plotting_axes = [axes[1, 0], axes[0, 1], axes[0, 0]] fig.delaxes(axes[1, 1]) _ = [ plot_strat(strat=strat_, title=title_, ax=ax_, true_testfun=testfun, xlabel="Frequency (kHz)", ylabel="Intensity (dB HL)", flipx=True, logx=True, show=False, include_legend=False, include_colorbar=False) for ax_, strat_, title_ in zip(plotting_axes, strats, titles) ] fig.tight_layout() handles, labels = axes[1, 0].get_legend_handles_labels() fig.legend(handles, labels, loc="lower right", bbox_to_anchor=(0.8, 0.2)) cbr = fig.colorbar(axes[1, 0].images[0], ax=plotting_axes) cbr.set_label("Probability of Detection") return fig
def plot_acquisition_examples(sobol_trials, opt_trials, target_level=0.75): ### Same model, different acqf figure #### configs = { "common": { "pairwise": False, "target": target_level, "lb": "[-3]", "ub": "[3]", }, "experiment": { "acqf": [ "MonotonicMCPosteriorVariance", "MonotonicBernoulliMCMutualInformation", "MonotonicMCLSE", ], "modelbridge_cls": "MonotonicSingleProbitModelbridge", "init_strat_cls": "SobolStrategy", "opt_strat_cls": "ModelWrapperStrategy", "model": "MonotonicRejectionGP", "parnames": "[intensity]", }, "MonotonicMCLSE": { "target": target_level, "beta": 3.98, }, "MonotonicRejectionGP": { "inducing_size": 100, "mean_covar_factory": "monotonic_mean_covar_factory", "monotonic_idxs": "[0]", "uniform_idxs": "[]", }, "MonotonicSingleProbitModelbridge": { "restarts": 10, "samps": 1000 }, "SobolStrategy": { "n_trials": sobol_trials }, "ModelWrapperStrategy": { "n_trials": opt_trials, "refit_every": refit_every, }, } def true_testfun(x): return norm.cdf(3 * x) class SimpleLinearProblem(Problem): def f(self, x): return norm.ppf(true_testfun(x)) lb = [-3] ub = [3] logger = BenchmarkLogger() problem = SimpleLinearProblem(lb, ub) bench = Benchmark( problem=problem, logger=logger, configs=configs, global_seed=global_seed, n_reps=1, ) # sobol_trials # now run each for just init trials, taking care to reseed each time strats = [] for c in bench.combinations: np.random.seed(global_seed) torch.manual_seed(global_seed) s = SequentialStrategy.from_config(Config(config_dict=c)) for _ in range(sobol_trials): next_x = s.gen() s.add_data(next_x, [problem.sample_y(next_x)]) strats.append(s) # get first gen from all 3 first_gens = [s.gen() for s in strats] fig, ax = plt.subplots(2, 2) plot_strat( strat=strats[0], title=f"First active trial\n (after {sobol_trials} Sobol trials)", ax=ax[0, 0], true_testfun=true_testfun, target_level=target_level, show=False, include_legend=False) samps = [ norm.cdf(s.sample(torch.Tensor(g), num_samples=10000)) for s, g in zip(strats, first_gens) ] predictions = [np.mean(s) for s in samps] names = ["First BALV sample", "First BALD sample", "First LSE sample"] markers = ["s", "*", "^"] for i in range(3): ax[0, 0].scatter( first_gens[i][0][0], predictions[i], label=names[i], marker=markers[i], color="black", ) # now run them all for the full duration for s in strats: for _tr in range(opt_trials): next_x = s.gen() s.add_data(next_x, [problem.sample_y(next_x)]) plotting_axes = [ax[0, 1], ax[1, 0], ax[1, 1]] titles = [ f"Monotonic RBF Model,\n BALV, after {sobol_trials+opt_trials} total trials", f"Monotonic RBF Model,\n BALD, after {sobol_trials+opt_trials} total trials", f"Monotonic RBF Model,\n LSE (ours) after {sobol_trials+opt_trials} total trials", ] _ = [ plot_strat(strat=s, title=t, ax=a, true_testfun=true_testfun, target_level=target_level, show=False, include_legend=False) for a, s, t in zip(plotting_axes, strats, titles) ] fig.tight_layout() handles, labels = ax[0, 0].get_legend_handles_labels() lgd = fig.legend(handles, labels, loc="lower right", bbox_to_anchor=(1.5, 0.25)) # return legend so savefig works correctly return fig, lgd
def plot_novel_lse_grids(sobol_trials, opt_trials, funtype="detection"): """ Generates Fig. TBA """ logger = BenchmarkLogger( log_every=opt_trials) # we only care about final perf bench_rbf = { "common": { "pairwise": False, "target": 0.75 }, "experiment": { "acqf": "MonotonicMCLSE", "modelbridge_cls": "MonotonicSingleProbitModelbridge", "init_strat_cls": "SobolStrategy", "opt_strat_cls": "ModelWrapperStrategy", "model": "MonotonicRejectionGP", "parnames": "[context,intensity]", }, "MonotonicMCLSE": { "target": 0.75, "beta": 3.98, }, "MonotonicRejectionGP": { "inducing_size": 100, "mean_covar_factory": [ "monotonic_mean_covar_factory", ], "monotonic_idxs": ["[1]", "[]"], "uniform_idxs": "[]", }, "MonotonicSingleProbitModelbridge": { "restarts": 10, "samps": 1000 }, "SobolStrategy": { "n_trials": [sobol_trials], }, "ModelWrapperStrategy": { "n_trials": [opt_trials], "refit_every": [refit_every], }, } bench_song = { "common": { "pairwise": False, "target": 0.75 }, "experiment": { "acqf": "BernoulliMCMutualInformation", "modelbridge_cls": "SingleProbitModelbridgeWithSongHeuristic", "init_strat_cls": "SobolStrategy", "opt_strat_cls": "ModelWrapperStrategy", "model": "GPClassificationModel", "parnames": "[context,intensity]", }, "GPClassificationModel": { "inducing_size": 100, "dim": 2, "mean_covar_factory": [ "song_mean_covar_factory", ], }, "SingleProbitModelbridgeWithSongHeuristic": { "restarts": 10, "samps": 1000 }, "SobolStrategy": { "n_trials": [sobol_trials], }, "ModelWrapperStrategy": { "n_trials": [opt_trials], "refit_every": [refit_every], }, } all_bench_configs = [bench_rbf, bench_song] if funtype == "detection": testfun = novel_detection_testfun yes_label = "Detected trial" no_label = "Nondetected trial" elif funtype == "discrimination": testfun = novel_discrimination_testfun yes_label = "Correct trial" no_label = "Incorrect trial" else: raise RuntimeError("unknown testfun") class NovelProblem(LSEProblem, Problem): def f(self, x): return testfun(x) lb = [-1, -1] ub = [1, 1] benches = [] problem = NovelProblem(lb, ub, gridsize=50) for config in all_bench_configs: full_config = copy(config) full_config["common"]["lb"] = str(lb) full_config["common"]["ub"] = str(ub) benches.append( Benchmark( problem=problem, logger=logger, configs=full_config, global_seed=global_seed, n_reps=1, )) combo_bench = combine_benchmarks(*benches) strats = [] for config in combo_bench.combinations: strat = combo_bench.run_experiment(config, logger, seed=global_seed, rep=0) strats.append(strat) titles = [ "Monotonic RBF Model, LSE (ours)", "Nonmonotonic RBF Model, LSE (ours)", "Linear-Additive Model, BALD", ] fig, axes = plt.subplots(2, 2, figsize=(7.5, 6.5)) plotting_axes = [axes[1, 0], axes[0, 1], axes[0, 0]] fig.delaxes(axes[1, 1]) _ = [ plot_strat(strat=strat_, title=title_, ax=ax_, true_testfun=testfun, yes_label=yes_label, no_label=no_label, show=False, include_legend=False, include_colorbar=False) for ax_, strat_, title_ in zip(plotting_axes, strats, titles) ] fig.tight_layout() handles, labels = axes[1, 0].get_legend_handles_labels() fig.legend(handles, labels, loc="lower right", bbox_to_anchor=(0.8, 0.2)) cbr = fig.colorbar(axes[1, 0].images[0], ax=plotting_axes) cbr.set_label("Probability of Detection") return fig