def full_benchmark_run( # Full run, multiple tests. problem_groups: (Optional[Dict[str, Union[List[BenchmarkProblem], List[str]]]]) = None, method_groups: (Optional[Dict[str, Union[List[GenerationStrategy], List[str]]]]) = None, num_trials: Union[int, List[List[int]]] = 20, num_replications: int = 20, batch_size: Union[int, List[List[int]]] = 1, raise_all_exceptions: bool = False, benchmark_test: FunctionType = benchmark_test, benchmark_replication: FunctionType = benchmark_replication, benchmark_trial: FunctionType = benchmark_trial, verbose_logging: bool = True, # Number of trials that need to fail for a replication to be considered failed. failed_trials_tolerated: int = 5, # Number of replications that need to fail for a test to be considered failed. failed_replications_tolerated: int = 3, ) -> Dict[str, Dict[str, List[Experiment]]]: """Full run of the benchmarking suite. To make benchmarking distrubuted at a level of a test, a replication, or a trial (or any combination of those), by passing in a wrapped (in some scheduling logic) version of a corresponding function from this module. Here, `problem_groups` and `method_groups` are dictionaries that have the same keys such that we can run a specific subset of problems with a corresponding subset of methods. Example: :: problem_groups = { "single_fidelity": [ackley, branin], "multi_fidelity": [augmented_hartmann], } method_groups = { "single_fidelity": [single_task_GP_and_NEI_strategy], "multi_fidelity": [fixed_noise_MFGP_and_MFKG_strategy], } Here, `ackley` and `branin` will be run against `single_task_GP_and_NEI_strategy` and `augmented_hartmann` against `fixed_noise_MFGP_and_MFKG_strategy`. Args: problem_groups: Problems to benchmark on, represented as a dictionary from category string to List of BenchmarkProblem-s or string keys (must be in standard BOProblems). More on `problem_groups` below. method_groups: Methods to benchmark on, represented as a dictionary from category string to List of generation strategies or string keys (must be in standard BOMethods). More on `method_groups` below. num_replications: Number of times to run each test (each problem-method combination), for an aggregated result. num_trials: Number of trials in each test experiment. raise_all_exceptions: If set to True, any encountered exception will be raised; alternatively, failure tolerance thresholds are used and a few number of trials `failed_trials_tolerated` can fail before a replication is considered failed, as well some replications `failed_replications_tolerated` can fail before a benchmarking test is considered failed. benchmark_test: Function that runs a single benchmarking test. Defaults to `benchmark_test` in this module and must have the same signature. benchmark_replication: Function that runs a single benchmarking replication. Defaults to `benchmark_replication` in this module and must have the same signature. benchmark_trial: Function that runs a single trial. Defaults to `benchmark_trial` in this module and must have the same signature. verbose_logging: Whether logging level should be set to `INFO`. failed_trials_tolerated: How many trials can fail before a replication is considered failed and aborted. Defaults to 5. failed_replications_tolerated: How many replications can fail before a test is considered failed and aborted. Defaults to 3. """ problem_groups = problem_groups or {} method_groups = method_groups or {} _validate_groups(problem_groups, method_groups) exceptions = [] tests: Dict[str, Dict[str, List[Experiment]]] = {} for group_name in problem_groups: problems, methods = utils.get_problems_and_methods( problems=problem_groups.get(group_name), methods=method_groups.get(group_name), ) for problem_idx, problem in enumerate(problems): tests[problem.name] = {} for method_idx, method in enumerate(methods): tests[problem.name][method.name] = [] try: tests[problem.name][method.name] = benchmark_test( problem=problem, method=method, num_replications=num_replications, # For arguments passed as either numbers, or matrices, # xtract corresponding values for the given combination. num_trials=utils.get_corresponding( num_trials, problem_idx, method_idx), batch_size=utils.get_corresponding( batch_size, problem_idx, method_idx), benchmark_replication=benchmark_replication, benchmark_trial=benchmark_trial, raise_all_exceptions=raise_all_exceptions, verbose_logging=verbose_logging, failed_replications_tolerated= failed_replications_tolerated, failed_trials_tolerated=failed_trials_tolerated, ) except Exception as err: if raise_all_exceptions: raise exceptions.append(err) # TODO[T53975770]: test logger.info(f"Obtained benchmarking test experiments: {tests}") return tests
def full_benchmark_run( # Full run, multiple tests. problems: Optional[Union[List[BenchmarkProblem], List[str]]] = None, methods: Optional[Union[List[GenerationStrategy], List[str]]] = None, num_trials: Union[int, List[List[int]]] = 20, num_replications: int = 20, batch_size: Union[int, List[List[int]]] = 1, raise_all_exceptions: bool = False, benchmark_test: FunctionType = benchmark_test, benchmark_replication: FunctionType = benchmark_replication, benchmark_trial: FunctionType = benchmark_trial, verbose_logging: bool = True, # Number of trials that need to fail for a replication to be considered failed. failed_trials_tolerated: int = 5, # Number of replications that need to fail for a test to be considered failed. failed_replications_tolerated: int = 3, ) -> Dict[str, Dict[str, List[Experiment]]]: """Full run of the benchmarking suite. To make benchmarking distrubuted at a level of a test, a replication, or a trial (or any combination of those), by passing in a wrapped (in some scheduling logic) version of a corresponding function from this module. Args: problems: Problems to benchmark on, represented as BenchmarkProblem-s or string keys (must be in standard BOProblems). Defaults to all standard BOProblems. methods: Methods to benchmark, represented as generation strategies or or string keys (must be in standard BOMethods). Defaults to all standard BOMethods. num_replications: Number of times to run each test (each problem-method combination), for an aggregated result. num_trials: Number of trials in each test experiment. raise_all_exceptions: If set to True, any encountered exception will be raised; alternatively, failure tolerance thresholds are used and a few number of trials `failed_trials_tolerated` can fail before a replication is considered failed, as well some replications `failed_replications_tolerated` can fail before a benchmarking test is considered failed. benchmark_test: Function that runs a single benchmarking test. Defaults to `benchmark_test` in this module and must have the same signature. benchmark_replication: Function that runs a single benchmarking replication. Defaults to `benchmark_replication` in this module and must have the same signature. benchmark_trial: Function that runs a single trial. Defaults to `benchmark_trial` in this module and must have the same signature. verbose_logging: Whether logging level should be set to `INFO`. failed_trials_tolerated: How many trials can fail before a replication is considered failed and aborted. Defaults to 5. failed_replications_tolerated: How many replications can fail before a test is considered failed and aborted. Defaults to 3. """ exceptions = [] tests: Dict[str, Dict[str, List[Experiment]]] = {} problems, methods = utils.get_problems_and_methods(problems=problems, methods=methods) for problem_idx, problem in enumerate(problems): tests[problem.name] = {} for method_idx, method in enumerate(methods): tests[problem.name][method.name] = [] try: tests[problem.name][method.name] = benchmark_test( problem=problem, method=method, num_replications=num_replications, # For arguments passed as either numbers, or matrices, # xtract corresponding values for the given combination. num_trials=utils.get_corresponding(num_trials, problem_idx, method_idx), batch_size=utils.get_corresponding(batch_size, problem_idx, method_idx), benchmark_replication=benchmark_replication, benchmark_trial=benchmark_trial, raise_all_exceptions=raise_all_exceptions, verbose_logging=verbose_logging, failed_replications_tolerated=failed_replications_tolerated, failed_trials_tolerated=failed_trials_tolerated, ) except Exception as err: if raise_all_exceptions: raise exceptions.append(err) # TODO[T53975770]: test logger.info(f"Obtained benchmarking test experiments: {tests}") return tests