예제 #1
0
def benchmark_replication(  # One optimization loop.
    problem: BenchmarkProblem,
    method: GenerationStrategy,
    num_trials: int,
    replication_index: Optional[int] = None,
    batch_size: int = 1,
    raise_all_exceptions: bool = False,
    benchmark_trial: FunctionType = benchmark_trial,
    verbose_logging: bool = True,
    # Number of trials that need to fail for a replication to be considered failed.
    failed_trials_tolerated: int = 5,
) -> Experiment:
    """Runs one benchmarking replication (equivalent to one optimization loop).

    Args:
        problem: Problem to benchmark on.
        method: Method to benchmark, represented as generation strategies.
        num_trials: Number of trials in each test experiment.
        batch_size: Batch size for this replication, defaults to 1.
        raise_all_exceptions: If set to True, any encountered exception will be
            raised; alternatively, failure tolerance thresholds are used and a few
            number of trials `failed_trials_tolerated` can fail before a replication
            is considered failed.
        benchmark_trial: Function that runs a single trial. Defaults
            to `benchmark_trial` in this module and must have the same signature.
        verbose_logging: Whether logging level should be set to `INFO`.
        failed_trials_tolerated: How many trials can fail before a replication is
            considered failed and aborted. Defaults to 5.
    """
    torch.manual_seed(replication_index)
    np.random.seed(replication_index)
    random.seed(replication_index)
    trial_exceptions = []
    experiment_name = f"{method.name}_on_{problem.name}"
    if replication_index is not None:
        experiment_name += f"__v{replication_index}"
    # Make sure the generation strategy starts from the beginning.
    method = method.clone_reset()

    # Choose whether to run replication via Service or Developer API, based on
    # whether the problem was set up using Ax classes like `SearchSpace` and
    # `OptimizationConfig` or using "RESTful" Service API-like constructs like
    # dict parameter representations and `SyntheticFunction`-s or custom callables
    # for evaluation function.
    replication_runner = (_benchmark_replication_Service_API if isinstance(
        problem, SimpleBenchmarkProblem) else _benchmark_replication_Dev_API)
    experiment, exceptions = replication_runner(
        problem=problem,  # pyre-ignore[6]
        method=method,
        num_trials=num_trials,
        experiment_name=experiment_name,
        batch_size=batch_size,
        raise_all_exceptions=raise_all_exceptions,
        benchmark_trial=benchmark_trial,
        verbose_logging=verbose_logging,
        failed_trials_tolerated=failed_trials_tolerated,
    )
    experiment.fetch_data()
    trial_exceptions.extend(exceptions)
    return experiment
예제 #2
0
 def test_clone_reset(self):
     ftgs = GenerationStrategy(steps=[
         GenerationStep(model=Models.FACTORIAL, num_arms=1),
         GenerationStep(model=Models.THOMPSON, num_arms=2),
     ])
     ftgs._curr = ftgs._steps[1]
     self.assertEqual(ftgs._curr.index, 1)
     self.assertEqual(ftgs.clone_reset()._curr.index, 0)
예제 #3
0
    def run_benchmark_test(
        self,
        setup: BenchmarkSetup,
        generation_strategy: GenerationStrategy,
        num_runs: int = 20,
        raise_all_errors: bool = False,
    ) -> Dict[Tuple[str, str, int], BenchmarkSetup]:
        """Run full benchmark test for the given method and problem combination.
        A benchmark test consists of repeated full benchmark runs.

        Args:
            setup: setup, runs on which to execute; includes
                a benchmarking problem, total number of iterations, etc.
            generation strategy: generation strategy that defines which
                generation methods should be used in this benchmarking test
            num_runs: how many benchmark runs of given problem and method
                combination to run with the given setup for one benchmark test
        """
        num_failures = 0
        benchmark_runs: Dict[Tuple[str, str, int], BenchmarkSetup] = {}
        logger.info(f"Testing {generation_strategy.name} on {setup.name}:")
        for run_idx in range(num_runs):
            logger.info(f"Run {run_idx}")
            run_key = (setup.name, generation_strategy.name, run_idx)
            # If this run has already been executed, log and skip it.
            if run_key in self._runs:
                self._error_messages.append(  # pragma: no cover
                    f"Run {run_idx} of {generation_strategy.name} on {setup.name} "
                    "has already been executed in this benchmarking suite."
                    "Check that this method + problem combination is not "
                    "included in the benchmarking suite twice. Only the first "
                    "run will be recorded.")
                continue

            # When number of failures in this test exceeds the allowed max,
            # we consider the whole run failed.
            while num_failures < ALLOWED_RUN_RETRIES:
                try:
                    benchmark_runs[run_key] = self.run_benchmark_run(
                        setup.clone_reset(), generation_strategy.clone_reset())
                    self._generator_changes[
                        run_key] = generation_strategy.generator_changes
                    break
                except Exception as err:  # pragma: no cover
                    if raise_all_errors:
                        raise err
                    logger.exception(err)
                    num_failures += 1
                    self._error_messages.append(f"Error in {run_key}: {err}")

        if num_failures >= 5:
            self._error_messages.append(
                f"Considering {generation_strategy.name} on {setup.name} failed"
            )
            self._failed_runs.append((setup.name, generation_strategy.name))
        else:
            self._runs.update(benchmark_runs)
        return self._runs
예제 #4
0
 def test_equality(self):
     gs1 = GenerationStrategy(steps=[
         GenerationStep(model=Models.SOBOL, num_arms=5),
         GenerationStep(model=Models.GPEI, num_arms=-1),
     ])
     gs2 = gs1.clone_reset()
     self.assertEqual(gs1, gs2)
     gs1._data = get_data()
     self.assertNotEqual(gs1, gs2)
     gs2._data = get_data()
     self.assertEqual(gs1, gs2)
     gs1 = gs1.clone_reset()
     gs2 = gs2.clone_reset()
     gs1.gen(experiment=get_branin_experiment())
     self.assertNotEqual(gs1, gs2)
     gs2.gen(experiment=get_branin_experiment())
     # Each generation strategy generated a different arm, so they are not
     # equal even though they have the same setup and generated the same
     # number of arms.
     self.assertNotEqual(gs1, gs2)
예제 #5
0
    def test_equality(self):
        gs1 = GenerationStrategy(steps=[
            GenerationStep(model=Models.SOBOL, num_arms=5),
            GenerationStep(model=Models.GPEI, num_arms=-1),
        ])
        gs2 = GenerationStrategy(steps=[
            GenerationStep(model=Models.SOBOL, num_arms=5),
            GenerationStep(model=Models.GPEI, num_arms=-1),
        ])
        self.assertEqual(gs1, gs2)

        # Clone_reset() doesn't clone exactly, so they won't be equal.
        gs3 = gs1.clone_reset()
        self.assertNotEqual(gs1, gs3)