Python Benchmark Examples, aepsych.benchmark.Benchmark Python Examples

Example #1

0

Show file

File: test_benchmark.py Project: mshvartsman/aepsych-1

    def test_bench_smoke(self):

        logger = BenchmarkLogger(log_every=2)
        problem = TestProblem(lb=[0], ub=[1])

        bench = Benchmark(
            problem=problem, logger=logger, configs=self.bench_config, n_reps=2
        )
        bench.run_benchmarks()

        out = bench.logger.pandas()

        # have as many final results as we expect
        self.assertTrue(len(out[out.final]) == bench.num_benchmarks)

        # have as many repetitions as we expect
        self.assertTrue(len(out.rep.unique()) == bench.n_reps)

        # reporting intervals are correct
        self.assertTrue((out[~out.final].trial_id % 2 == 0).all())

        # we don't run extra trials
        total_trials = out.SobolStrategy_n_trials.astype(
            int
        ) + out.ModelWrapperStrategy_n_trials.astype(int)
        self.assertTrue((out.trial_id <= total_trials).all())

Example #2

0

Show file

File: test_benchmark.py Project: mshvartsman/aepsych-1

    def test_add_bench(self):

        logger = BenchmarkLogger(log_every=2)
        problem = TestProblem(lb=[0], ub=[1])

        bench_1 = Benchmark(
            problem=problem,
            logger=logger,
            configs=self.bench_config,
            global_seed=2,
            n_reps=2,
        )
        bench_2 = Benchmark(
            problem=problem,
            logger=logger,
            configs=self.bench_config,
            global_seed=2,
            n_reps=2,
        )

        bench_combined = bench_1 + bench_2
        three_bench = combine_benchmarks(bench_1, bench_2, bench_1)

        self.assertTrue((len(bench_combined.combinations) == 12))
        self.assertTrue((len(three_bench.combinations) == 18))
        self.assertTrue((len(bench_1.combinations) == 6))
        self.assertTrue((len(bench_2.combinations) == 6))

Example #3

0

Show file

File: test_benchmark.py Project: facebookresearch/aepsych

 def test_nonmonotonic_single_lse_eval(self):
     config = {
         "common": {
             "outcome_type": "single_probit",
             "strategy_names": "[init_strat, opt_strat]",
             "acqf": "MCLevelSetEstimation",
             "model": "GPClassificationModel",
         },
         "init_strat": {
             "generator": "SobolGenerator",
             "n_trials": 50
         },
         "opt_strat": {
             "generator": "OptimizeAcqfGenerator",
             "n_trials": 1
         },
         "MCLevelSetEstimation": {
             "target": 0.75,
             "beta": 3.98,
         },
         "GPClassificationModel": {
             "inducing_size": 10,
             "mean_covar_factory": "default_mean_covar_factory",
         },
         "OptimizeAcqfGenerator": {
             "restarts": 10,
             "samps": 1000,
         },
     }
     problem = LSETestProblem()
     bench = Benchmark(problems=[problem], configs=config, log_every=100)
     _, strat = bench.run_experiment(problem, bench.combinations[0], 0, 0)
     e = problem.evaluate(strat)
     self.assertTrue(e["mean_square_err_p"] < 0.05)

Example #4

0

Show file

File: test_benchmark.py Project: facebookresearch/aepsych

    def test_bench_smoke(self):

        problem1 = TestProblem()
        problem2 = LSETestProblem()

        bench = Benchmark(
            problems=[problem1, problem2],
            configs=self.bench_config,
            n_reps=2,
            log_every=2,
        )
        bench.run_benchmarks()

        out = bench.pandas()

        # assert problem metadata was correctly saved
        self.assertEqual(sorted(out["problem_name"].unique()),
                         ["test lse problem", "test problem"])
        self.assertEqual(
            sorted(out[out["problem_name"] == "test lse problem"]
                   ["problem_threshold"].unique()),
            ["0.75"],
        )

        # assert derived values work correctly
        self.assertEqual(
            sorted(out[out["problem_name"] == "test problem"]
                   ["opt_strat_n_trials"].unique()),
            ["2", "3"],
        )
        self.assertEqual(
            sorted(out[out["problem_name"] == "test lse problem"]
                   ["opt_strat_n_trials"].unique()),
            ["1", "2"],
        )

        # have as many final results as we expect
        self.assertTrue(len(out[out.final]) == bench.num_benchmarks)

        # have as many repetitions as we expect
        self.assertTrue(len(out.rep.unique()) == bench.n_reps)

        # reporting intervals are correct
        self.assertTrue((out[~out.final].trial_id % 2 == 0).all())

        # we don't run extra trials
        total_trials = out.init_strat_n_trials.astype(
            int) + out.opt_strat_n_trials.astype(int)
        self.assertTrue((out.trial_id <= total_trials).all())

        # ensure each simulation has a unique random seed
        self.assertTrue(out[out["final"]]["seed"].is_unique)

Example #5

0

Show file

File: test_benchmark.py Project: facebookresearch/aepsych

    def test_monotonic_single_lse_eval(self):
        config = {
            "common": {
                "outcome_type": "single_probit",
                "strategy_names": "[init_strat, opt_strat]",
                "acqf": "MonotonicMCLSE",
                "model": "MonotonicRejectionGP",
            },
            "init_strat": {
                "generator": "SobolGenerator",
                "n_trials": 50
            },
            "opt_strat": {
                "generator": "MonotonicRejectionGenerator",
                "n_trials": 1
            },
            "SobolGenerator": {
                "seed": 1
            },
            "MonotonicMCLSE": {
                "target": 0.75,
                "beta": 3.98,
            },
            "MonotonicRejectionGP": {
                "inducing_size": 10,
                "mean_covar_factory": "monotonic_mean_covar_factory",
                "monotonic_idxs": "[1]",
            },
            "MonotonicRejectionGenerator": {
                "model_gen_options": {
                    "num_restarts": 10,
                    "raw_samples": 1000,
                }
            },
        }
        problem = LSETestProblem()
        bench = Benchmark(problems=[problem], configs=config, log_every=100)
        _, strat = bench.run_experiment(problem, bench.combinations[0], 0, 0)

        e = problem.evaluate(strat)
        self.assertTrue(e["mean_square_err_p"] < 0.05)

Example #6

0

Show file

File: test_benchmark.py Project: mshvartsman/aepsych-1

 def test_monotonic_single_lse_eval(self):
     config = {
         "common": {
             "lb": "[-1, -1]",
             "ub": "[1, 1]",
             "outcome_type": "single_probit",
         },
         "experiment": {
             "acqf": "MonotonicMCLSE",
             "modelbridge_cls": "MonotonicSingleProbitModelbridge",
             "init_strat_cls": "SobolStrategy",
             "opt_strat_cls": "ModelWrapperStrategy",
             "model": "MonotonicRejectionGP",
         },
         "MonotonicMCLSE": {
             "target": 0.75,
             "beta": 3.98,
         },
         "MonotonicRejectionGP": {
             "inducing_size": 10,
             "mean_covar_factory": "monotonic_mean_covar_factory",
         },
         "MonotonicSingleProbitModelbridge": {
             "restarts": 10,
             "samps": 1000,
         },
         "SobolStrategy": {
             "n_trials": 50,
         },
         "ModelWrapperStrategy": {
             "n_trials": 1,
         },
     }
     problem = TestProblem(lb=[-1, -1], ub=[1, 1])
     logger = BenchmarkLogger(log_every=100)
     bench = Benchmark(problem=problem, configs=config, logger=logger)
     strat = bench.run_experiment(bench.combinations[0], logger, 0, 0)
     e = problem.evaluate(strat)
     self.assertTrue(e["mean_square_err_p"] < 0.05)

Example #7

0

Show file

def run_benchmarks_with_checkpoints(
    out_path: str,
    benchmark_name: str,
    problems: List[Problem],
    configs: Mapping[str, Union[str, list]],
    global_seed: Optional[int] = None,
    n_chunks: int = 1,
    n_reps_per_chunk: int = 1,
    log_every: Optional[int] = None,
    checkpoint_every: int = 60,
    n_proc: int = 1,
    serial_debug: bool = False,
) -> None:
    """Runs a series of benchmarks, saving both final and intermediate results to .csv files. Benchmarks are run in
    sequential chunks, each of which runs all combinations of problems/configs/reps in parallel. This function should
    always be used using the "if __name__ == '__main__': ..." idiom.

    Args:
        out_path (str): The path to save the results to.
        benchmark_name (str): A name give to this set of benchmarks. Results will be saved in files named like
            "out_path/benchmark_name_chunk{chunk_number}_out.csv"
        problems (List[Problem]): Problem objects containing the test function to evaluate.
        configs (Mapping[str, Union[str, list]]): Dictionary of configs to run.
            Lists at leaves are used to construct a cartesian product of configurations.
        global_seed (int, optional): Global seed to use for reproducible benchmarks.
            Defaults to randomized seeds.
        n_chunks (int): The number of chunks to break the results into. Each chunk will contain at least 1 run of every
            combination of problem and config.
        n_reps_per_chunk (int, optional): Number of repetitions to run each problem/config in each chunk.
        log_every (int, optional): Logging interval during an experiment. Defaults to only logging at the end.
        checkpoint_every (int): Save intermediate results every checkpoint_every seconds.
        n_proc (int): Number of processors to use.
        serial_debug: debug serially?
    """
    Path(out_path).mkdir(parents=True,
                         exist_ok=True)  # make an output folder if not exist
    if serial_debug:
        out_fname = Path(f"{out_path}/{benchmark_name}_out.csv")
        print(f"Starting {benchmark_name} benchmark (serial debug mode)...")
        bench = Benchmark(
            problems=problems,
            configs=configs,
            seed=global_seed,
            n_reps=n_reps_per_chunk * n_chunks,
            log_every=log_every,
        )
        bench.run_benchmarks()
        final_results = bench.pandas()
        final_results.to_csv(out_fname)
    else:
        for chunk in range(n_chunks):
            out_fname = Path(
                f"{out_path}/{benchmark_name}_chunk{chunk}_out.csv")

            intermediate_fname = Path(
                f"{out_path}/{benchmark_name}_chunk{chunk}_checkpoint.csv")
            print(f"Starting {benchmark_name} benchmark... chunk {chunk} ")

            bench = PathosBenchmark(
                nproc=n_proc,
                problems=problems,
                configs=configs,
                seed=None,
                n_reps=n_reps_per_chunk,
                log_every=log_every,
            )

            if global_seed is None:
                global_seed = int(np.random.randint(0, 200))
            bench.seed = (
                global_seed + chunk * bench.num_benchmarks
            )  # HACK. TODO: make num_benchmarks a property of bench configs
            bench.start_benchmarks()

            while not bench.is_done:
                time.sleep(checkpoint_every)
                collate_start = time.time()
                print(
                    f"Checkpointing {benchmark_name} chunk {chunk}..., {len(bench.futures)}/{bench.num_benchmarks} alive"
                )
                bench.collate_benchmarks(wait=False)
                temp_results = bench.pandas()
                if len(temp_results) > 0:
                    temp_results[
                        "rep"] = temp_results["rep"] + n_reps_per_chunk * chunk
                    temp_results.to_csv(intermediate_fname)
                print(
                    f"Collate done in {time.time()-collate_start} seconds, {len(bench.futures)}/{bench.num_benchmarks} left"
                )

            print(f"{benchmark_name} chunk {chunk} fully done!")
            final_results = bench.pandas()
            final_results[
                "rep"] = final_results["rep"] + n_reps_per_chunk * chunk
            final_results.to_csv(out_fname)

Example #8

0

Show file

File: stratplots.py Project: facebookresearch/aepsych

def plot_audiometric_lse_grids(sobol_trials,
                               opt_trials,
                               phenotype="Metabolic+Sensory",
                               beta=2):
    """
    Generates Fig. 8
    """

    logger = BenchmarkLogger(log_every=5)
    bench_rbf = {
        "common": {
            "pairwise": False,
            "target": 0.75
        },
        "experiment": {
            "acqf": "MonotonicMCLSE",
            "modelbridge_cls": "MonotonicSingleProbitModelbridge",
            "init_strat_cls": "SobolStrategy",
            "opt_strat_cls": "ModelWrapperStrategy",
            "model": "MonotonicRejectionGP",
            "parnames": "[context,intensity]",
        },
        "MonotonicMCLSE": {
            "target": 0.75,
            "beta": 3.98,
        },
        "MonotonicRejectionGP": {
            "inducing_size": 100,
            "mean_covar_factory": [
                "monotonic_mean_covar_factory",
            ],
            "monotonic_idxs": ["[1]", "[]"],
            "uniform_idxs": "[]",
        },
        "MonotonicSingleProbitModelbridge": {
            "restarts": 10,
            "samps": 1000
        },
        "SobolStrategy": {
            "n_trials": [sobol_trials],
        },
        "ModelWrapperStrategy": {
            "n_trials": [opt_trials],
            "refit_every": [refit_every],
        },
    }
    bench_song = {
        "common": {
            "pairwise": False,
            "target": 0.75
        },
        "experiment": {
            "acqf": "BernoulliMCMutualInformation",
            "modelbridge_cls": "SingleProbitModelbridgeWithSongHeuristic",
            "init_strat_cls": "SobolStrategy",
            "opt_strat_cls": "ModelWrapperStrategy",
            "model": "GPClassificationModel",
            "parnames": "[context,intensity]",
        },
        "GPClassificationModel": {
            "inducing_size": 100,
            "dim": 2,
            "mean_covar_factory": [
                "song_mean_covar_factory",
            ],
        },
        "SingleProbitModelbridgeWithSongHeuristic": {
            "restarts": 10,
            "samps": 1000
        },
        "SobolStrategy": {
            "n_trials": [sobol_trials],
        },
        "ModelWrapperStrategy": {
            "n_trials": [opt_trials],
            "refit_every": [refit_every],
        },
    }

    all_bench_configs = [bench_rbf, bench_song]

    testfun = make_songetal_testfun(phenotype=phenotype, beta=beta)

    class AudiometricProblem(LSEProblem, Problem):
        def f(self, x):
            return testfun(x)

    lb = [-3, -20]
    ub = [4, 120]
    benches = []
    problem = AudiometricProblem(lb, ub)
    for config in all_bench_configs:
        full_config = copy(config)
        full_config["common"]["lb"] = str(lb)
        full_config["common"]["ub"] = str(ub)
        benches.append(
            Benchmark(
                problem=problem,
                logger=logger,
                configs=full_config,
                global_seed=global_seed,
                n_reps=1,
            ))
    combo_bench = combine_benchmarks(*benches)
    strats = []

    for config in combo_bench.combinations:
        strat = combo_bench.run_experiment(config,
                                           logger,
                                           seed=global_seed,
                                           rep=0)
        strats.append(strat)

    titles = [
        "Monotonic RBF Model, LSE (ours)",
        "Nonmonotonic RBF Model, LSE (ours)",
        "Linear-Additive Model, BALD",
    ]
    fig, axes = plt.subplots(2, 2, figsize=(7.5, 6.5))
    plotting_axes = [axes[1, 0], axes[0, 1], axes[0, 0]]
    fig.delaxes(axes[1, 1])
    _ = [
        plot_strat(strat=strat_,
                   title=title_,
                   ax=ax_,
                   true_testfun=testfun,
                   xlabel="Frequency (kHz)",
                   ylabel="Intensity (dB HL)",
                   flipx=True,
                   logx=True,
                   show=False,
                   include_legend=False,
                   include_colorbar=False)
        for ax_, strat_, title_ in zip(plotting_axes, strats, titles)
    ]
    fig.tight_layout()
    handles, labels = axes[1, 0].get_legend_handles_labels()

    fig.legend(handles, labels, loc="lower right", bbox_to_anchor=(0.8, 0.2))
    cbr = fig.colorbar(axes[1, 0].images[0], ax=plotting_axes)
    cbr.set_label("Probability of Detection")

    return fig

Example #9

0

Show file

File: stratplots.py Project: facebookresearch/aepsych

def plot_acquisition_examples(sobol_trials, opt_trials, target_level=0.75):
    ### Same model, different acqf figure ####

    configs = {
        "common": {
            "pairwise": False,
            "target": target_level,
            "lb": "[-3]",
            "ub": "[3]",
        },
        "experiment": {
            "acqf": [
                "MonotonicMCPosteriorVariance",
                "MonotonicBernoulliMCMutualInformation",
                "MonotonicMCLSE",
            ],
            "modelbridge_cls":
            "MonotonicSingleProbitModelbridge",
            "init_strat_cls":
            "SobolStrategy",
            "opt_strat_cls":
            "ModelWrapperStrategy",
            "model":
            "MonotonicRejectionGP",
            "parnames":
            "[intensity]",
        },
        "MonotonicMCLSE": {
            "target": target_level,
            "beta": 3.98,
        },
        "MonotonicRejectionGP": {
            "inducing_size": 100,
            "mean_covar_factory": "monotonic_mean_covar_factory",
            "monotonic_idxs": "[0]",
            "uniform_idxs": "[]",
        },
        "MonotonicSingleProbitModelbridge": {
            "restarts": 10,
            "samps": 1000
        },
        "SobolStrategy": {
            "n_trials": sobol_trials
        },
        "ModelWrapperStrategy": {
            "n_trials": opt_trials,
            "refit_every": refit_every,
        },
    }

    def true_testfun(x):
        return norm.cdf(3 * x)

    class SimpleLinearProblem(Problem):
        def f(self, x):
            return norm.ppf(true_testfun(x))

    lb = [-3]
    ub = [3]

    logger = BenchmarkLogger()
    problem = SimpleLinearProblem(lb, ub)
    bench = Benchmark(
        problem=problem,
        logger=logger,
        configs=configs,
        global_seed=global_seed,
        n_reps=1,
    )

    # sobol_trials
    # now run each for just init trials, taking care to reseed each time
    strats = []
    for c in bench.combinations:
        np.random.seed(global_seed)
        torch.manual_seed(global_seed)
        s = SequentialStrategy.from_config(Config(config_dict=c))
        for _ in range(sobol_trials):
            next_x = s.gen()
            s.add_data(next_x, [problem.sample_y(next_x)])
        strats.append(s)

    # get first gen from all 3
    first_gens = [s.gen() for s in strats]

    fig, ax = plt.subplots(2, 2)
    plot_strat(
        strat=strats[0],
        title=f"First active trial\n (after {sobol_trials} Sobol trials)",
        ax=ax[0, 0],
        true_testfun=true_testfun,
        target_level=target_level,
        show=False,
        include_legend=False)
    samps = [
        norm.cdf(s.sample(torch.Tensor(g), num_samples=10000))
        for s, g in zip(strats, first_gens)
    ]
    predictions = [np.mean(s) for s in samps]
    names = ["First BALV sample", "First BALD sample", "First LSE sample"]
    markers = ["s", "*", "^"]
    for i in range(3):
        ax[0, 0].scatter(
            first_gens[i][0][0],
            predictions[i],
            label=names[i],
            marker=markers[i],
            color="black",
        )

    # now run them all for the full duration
    for s in strats:
        for _tr in range(opt_trials):
            next_x = s.gen()
            s.add_data(next_x, [problem.sample_y(next_x)])

    plotting_axes = [ax[0, 1], ax[1, 0], ax[1, 1]]

    titles = [
        f"Monotonic RBF Model,\n BALV, after {sobol_trials+opt_trials} total trials",
        f"Monotonic RBF Model,\n BALD, after {sobol_trials+opt_trials} total trials",
        f"Monotonic RBF Model,\n LSE (ours) after {sobol_trials+opt_trials} total trials",
    ]

    _ = [
        plot_strat(strat=s,
                   title=t,
                   ax=a,
                   true_testfun=true_testfun,
                   target_level=target_level,
                   show=False,
                   include_legend=False)
        for a, s, t in zip(plotting_axes, strats, titles)
    ]
    fig.tight_layout()
    handles, labels = ax[0, 0].get_legend_handles_labels()
    lgd = fig.legend(handles,
                     labels,
                     loc="lower right",
                     bbox_to_anchor=(1.5, 0.25))
    # return legend so savefig works correctly
    return fig, lgd

Example #10

0

Show file

File: stratplots.py Project: facebookresearch/aepsych

def plot_novel_lse_grids(sobol_trials, opt_trials, funtype="detection"):
    """
    Generates Fig. TBA
    """

    logger = BenchmarkLogger(
        log_every=opt_trials)  # we only care about final perf
    bench_rbf = {
        "common": {
            "pairwise": False,
            "target": 0.75
        },
        "experiment": {
            "acqf": "MonotonicMCLSE",
            "modelbridge_cls": "MonotonicSingleProbitModelbridge",
            "init_strat_cls": "SobolStrategy",
            "opt_strat_cls": "ModelWrapperStrategy",
            "model": "MonotonicRejectionGP",
            "parnames": "[context,intensity]",
        },
        "MonotonicMCLSE": {
            "target": 0.75,
            "beta": 3.98,
        },
        "MonotonicRejectionGP": {
            "inducing_size": 100,
            "mean_covar_factory": [
                "monotonic_mean_covar_factory",
            ],
            "monotonic_idxs": ["[1]", "[]"],
            "uniform_idxs": "[]",
        },
        "MonotonicSingleProbitModelbridge": {
            "restarts": 10,
            "samps": 1000
        },
        "SobolStrategy": {
            "n_trials": [sobol_trials],
        },
        "ModelWrapperStrategy": {
            "n_trials": [opt_trials],
            "refit_every": [refit_every],
        },
    }
    bench_song = {
        "common": {
            "pairwise": False,
            "target": 0.75
        },
        "experiment": {
            "acqf": "BernoulliMCMutualInformation",
            "modelbridge_cls": "SingleProbitModelbridgeWithSongHeuristic",
            "init_strat_cls": "SobolStrategy",
            "opt_strat_cls": "ModelWrapperStrategy",
            "model": "GPClassificationModel",
            "parnames": "[context,intensity]",
        },
        "GPClassificationModel": {
            "inducing_size": 100,
            "dim": 2,
            "mean_covar_factory": [
                "song_mean_covar_factory",
            ],
        },
        "SingleProbitModelbridgeWithSongHeuristic": {
            "restarts": 10,
            "samps": 1000
        },
        "SobolStrategy": {
            "n_trials": [sobol_trials],
        },
        "ModelWrapperStrategy": {
            "n_trials": [opt_trials],
            "refit_every": [refit_every],
        },
    }
    all_bench_configs = [bench_rbf, bench_song]

    if funtype == "detection":
        testfun = novel_detection_testfun
        yes_label = "Detected trial"
        no_label = "Nondetected trial"
    elif funtype == "discrimination":
        testfun = novel_discrimination_testfun
        yes_label = "Correct trial"
        no_label = "Incorrect trial"
    else:
        raise RuntimeError("unknown testfun")

    class NovelProblem(LSEProblem, Problem):
        def f(self, x):
            return testfun(x)

    lb = [-1, -1]
    ub = [1, 1]
    benches = []
    problem = NovelProblem(lb, ub, gridsize=50)
    for config in all_bench_configs:
        full_config = copy(config)
        full_config["common"]["lb"] = str(lb)
        full_config["common"]["ub"] = str(ub)
        benches.append(
            Benchmark(
                problem=problem,
                logger=logger,
                configs=full_config,
                global_seed=global_seed,
                n_reps=1,
            ))
    combo_bench = combine_benchmarks(*benches)
    strats = []

    for config in combo_bench.combinations:
        strat = combo_bench.run_experiment(config,
                                           logger,
                                           seed=global_seed,
                                           rep=0)
        strats.append(strat)

    titles = [
        "Monotonic RBF Model, LSE (ours)",
        "Nonmonotonic RBF Model, LSE (ours)",
        "Linear-Additive Model, BALD",
    ]
    fig, axes = plt.subplots(2, 2, figsize=(7.5, 6.5))
    plotting_axes = [axes[1, 0], axes[0, 1], axes[0, 0]]
    fig.delaxes(axes[1, 1])
    _ = [
        plot_strat(strat=strat_,
                   title=title_,
                   ax=ax_,
                   true_testfun=testfun,
                   yes_label=yes_label,
                   no_label=no_label,
                   show=False,
                   include_legend=False,
                   include_colorbar=False)
        for ax_, strat_, title_ in zip(plotting_axes, strats, titles)
    ]
    fig.tight_layout()
    handles, labels = axes[1, 0].get_legend_handles_labels()

    fig.legend(handles, labels, loc="lower right", bbox_to_anchor=(0.8, 0.2))
    cbr = fig.colorbar(axes[1, 0].images[0], ax=plotting_axes)
    cbr.set_label("Probability of Detection")

    return fig