def benchmark_replication( # One optimization loop. problem: BenchmarkProblem, method: GenerationStrategy, num_trials: int, replication_index: Optional[int] = None, batch_size: int = 1, raise_all_exceptions: bool = False, benchmark_trial: FunctionType = benchmark_trial, verbose_logging: bool = True, # Number of trials that need to fail for a replication to be considered failed. failed_trials_tolerated: int = 5, ) -> Experiment: """Runs one benchmarking replication (equivalent to one optimization loop). Args: problem: Problem to benchmark on. method: Method to benchmark, represented as generation strategies. num_trials: Number of trials in each test experiment. batch_size: Batch size for this replication, defaults to 1. raise_all_exceptions: If set to True, any encountered exception will be raised; alternatively, failure tolerance thresholds are used and a few number of trials `failed_trials_tolerated` can fail before a replication is considered failed. benchmark_trial: Function that runs a single trial. Defaults to `benchmark_trial` in this module and must have the same signature. verbose_logging: Whether logging level should be set to `INFO`. failed_trials_tolerated: How many trials can fail before a replication is considered failed and aborted. Defaults to 5. """ torch.manual_seed(replication_index) np.random.seed(replication_index) random.seed(replication_index) trial_exceptions = [] experiment_name = f"{method.name}_on_{problem.name}" if replication_index is not None: experiment_name += f"__v{replication_index}" # Make sure the generation strategy starts from the beginning. method = method.clone_reset() # Choose whether to run replication via Service or Developer API, based on # whether the problem was set up using Ax classes like `SearchSpace` and # `OptimizationConfig` or using "RESTful" Service API-like constructs like # dict parameter representations and `SyntheticFunction`-s or custom callables # for evaluation function. replication_runner = (_benchmark_replication_Service_API if isinstance( problem, SimpleBenchmarkProblem) else _benchmark_replication_Dev_API) experiment, exceptions = replication_runner( problem=problem, # pyre-ignore[6] method=method, num_trials=num_trials, experiment_name=experiment_name, batch_size=batch_size, raise_all_exceptions=raise_all_exceptions, benchmark_trial=benchmark_trial, verbose_logging=verbose_logging, failed_trials_tolerated=failed_trials_tolerated, ) experiment.fetch_data() trial_exceptions.extend(exceptions) return experiment
def test_clone_reset(self): ftgs = GenerationStrategy(steps=[ GenerationStep(model=Models.FACTORIAL, num_arms=1), GenerationStep(model=Models.THOMPSON, num_arms=2), ]) ftgs._curr = ftgs._steps[1] self.assertEqual(ftgs._curr.index, 1) self.assertEqual(ftgs.clone_reset()._curr.index, 0)
def run_benchmark_test( self, setup: BenchmarkSetup, generation_strategy: GenerationStrategy, num_runs: int = 20, raise_all_errors: bool = False, ) -> Dict[Tuple[str, str, int], BenchmarkSetup]: """Run full benchmark test for the given method and problem combination. A benchmark test consists of repeated full benchmark runs. Args: setup: setup, runs on which to execute; includes a benchmarking problem, total number of iterations, etc. generation strategy: generation strategy that defines which generation methods should be used in this benchmarking test num_runs: how many benchmark runs of given problem and method combination to run with the given setup for one benchmark test """ num_failures = 0 benchmark_runs: Dict[Tuple[str, str, int], BenchmarkSetup] = {} logger.info(f"Testing {generation_strategy.name} on {setup.name}:") for run_idx in range(num_runs): logger.info(f"Run {run_idx}") run_key = (setup.name, generation_strategy.name, run_idx) # If this run has already been executed, log and skip it. if run_key in self._runs: self._error_messages.append( # pragma: no cover f"Run {run_idx} of {generation_strategy.name} on {setup.name} " "has already been executed in this benchmarking suite." "Check that this method + problem combination is not " "included in the benchmarking suite twice. Only the first " "run will be recorded.") continue # When number of failures in this test exceeds the allowed max, # we consider the whole run failed. while num_failures < ALLOWED_RUN_RETRIES: try: benchmark_runs[run_key] = self.run_benchmark_run( setup.clone_reset(), generation_strategy.clone_reset()) self._generator_changes[ run_key] = generation_strategy.generator_changes break except Exception as err: # pragma: no cover if raise_all_errors: raise err logger.exception(err) num_failures += 1 self._error_messages.append(f"Error in {run_key}: {err}") if num_failures >= 5: self._error_messages.append( f"Considering {generation_strategy.name} on {setup.name} failed" ) self._failed_runs.append((setup.name, generation_strategy.name)) else: self._runs.update(benchmark_runs) return self._runs
def test_equality(self): gs1 = GenerationStrategy(steps=[ GenerationStep(model=Models.SOBOL, num_arms=5), GenerationStep(model=Models.GPEI, num_arms=-1), ]) gs2 = gs1.clone_reset() self.assertEqual(gs1, gs2) gs1._data = get_data() self.assertNotEqual(gs1, gs2) gs2._data = get_data() self.assertEqual(gs1, gs2) gs1 = gs1.clone_reset() gs2 = gs2.clone_reset() gs1.gen(experiment=get_branin_experiment()) self.assertNotEqual(gs1, gs2) gs2.gen(experiment=get_branin_experiment()) # Each generation strategy generated a different arm, so they are not # equal even though they have the same setup and generated the same # number of arms. self.assertNotEqual(gs1, gs2)
def test_equality(self): gs1 = GenerationStrategy(steps=[ GenerationStep(model=Models.SOBOL, num_arms=5), GenerationStep(model=Models.GPEI, num_arms=-1), ]) gs2 = GenerationStrategy(steps=[ GenerationStep(model=Models.SOBOL, num_arms=5), GenerationStep(model=Models.GPEI, num_arms=-1), ]) self.assertEqual(gs1, gs2) # Clone_reset() doesn't clone exactly, so they won't be equal. gs3 = gs1.clone_reset() self.assertNotEqual(gs1, gs3)