Ejemplo n.º 1
0
def full_benchmark_run(  # Full run, multiple tests.
    problem_groups: (Optional[Dict[str, Union[List[BenchmarkProblem],
                                              List[str]]]]) = None,
    method_groups: (Optional[Dict[str, Union[List[GenerationStrategy],
                                             List[str]]]]) = None,
    num_trials: Union[int, List[List[int]]] = 20,
    num_replications: int = 20,
    batch_size: Union[int, List[List[int]]] = 1,
    raise_all_exceptions: bool = False,
    benchmark_test: FunctionType = benchmark_test,
    benchmark_replication: FunctionType = benchmark_replication,
    benchmark_trial: FunctionType = benchmark_trial,
    verbose_logging: bool = True,
    # Number of trials that need to fail for a replication to be considered failed.
    failed_trials_tolerated: int = 5,
    # Number of replications that need to fail for a test to be considered failed.
    failed_replications_tolerated: int = 3,
) -> Dict[str, Dict[str, List[Experiment]]]:
    """Full run of the benchmarking suite. To make benchmarking distrubuted at
    a level of a test, a replication, or a trial (or any combination of those),
    by passing in a wrapped (in some scheduling logic) version of a corresponding
    function from this module.

    Here, `problem_groups` and `method_groups` are dictionaries that have the same
    keys such that we can run a specific subset of problems with a corresponding
    subset of methods.

    Example:

    ::

        problem_groups = {
            "single_fidelity": [ackley, branin],
            "multi_fidelity": [augmented_hartmann],
        }
        method_groups = {
            "single_fidelity": [single_task_GP_and_NEI_strategy],
            "multi_fidelity": [fixed_noise_MFGP_and_MFKG_strategy],
        }

    Here, `ackley` and `branin` will be run against `single_task_GP_and_NEI_strategy`
    and `augmented_hartmann` against `fixed_noise_MFGP_and_MFKG_strategy`.

    Args:
        problem_groups: Problems to benchmark on, represented as a dictionary from
            category string to List of BenchmarkProblem-s or string keys (must be
            in standard BOProblems). More on `problem_groups` below.
        method_groups: Methods to benchmark on, represented as a dictionary from
            category string to List of generation strategies or string keys (must
            be in standard BOMethods). More on `method_groups` below.
        num_replications: Number of times to run each test (each problem-method
            combination), for an aggregated result.
        num_trials: Number of trials in each test experiment.
        raise_all_exceptions: If set to True, any encountered exception will be
            raised; alternatively, failure tolerance thresholds are used and a few
            number of trials `failed_trials_tolerated` can fail before a replication
            is considered failed, as well some replications
            `failed_replications_tolerated` can fail before a benchmarking test
            is considered failed.
        benchmark_test: Function that runs a single benchmarking test. Defaults
            to `benchmark_test` in this module and must have the same signature.
        benchmark_replication: Function that runs a single benchmarking replication.
            Defaults to `benchmark_replication` in this module and must have the
            same signature.
        benchmark_trial: Function that runs a single trial. Defaults
            to `benchmark_trial` in this module and must have the same signature.
        verbose_logging: Whether logging level should be set to `INFO`.
        failed_trials_tolerated: How many trials can fail before a replication is
            considered failed and aborted. Defaults to 5.
        failed_replications_tolerated: How many replications can fail before a
            test is considered failed and aborted. Defaults to 3.
    """
    problem_groups = problem_groups or {}
    method_groups = method_groups or {}
    _validate_groups(problem_groups, method_groups)
    exceptions = []
    tests: Dict[str, Dict[str, List[Experiment]]] = {}
    for group_name in problem_groups:
        problems, methods = utils.get_problems_and_methods(
            problems=problem_groups.get(group_name),
            methods=method_groups.get(group_name),
        )
        for problem_idx, problem in enumerate(problems):
            tests[problem.name] = {}
            for method_idx, method in enumerate(methods):
                tests[problem.name][method.name] = []
                try:
                    tests[problem.name][method.name] = benchmark_test(
                        problem=problem,
                        method=method,
                        num_replications=num_replications,
                        # For arguments passed as either numbers, or matrices,
                        # xtract corresponding values for the given combination.
                        num_trials=utils.get_corresponding(
                            num_trials, problem_idx, method_idx),
                        batch_size=utils.get_corresponding(
                            batch_size, problem_idx, method_idx),
                        benchmark_replication=benchmark_replication,
                        benchmark_trial=benchmark_trial,
                        raise_all_exceptions=raise_all_exceptions,
                        verbose_logging=verbose_logging,
                        failed_replications_tolerated=
                        failed_replications_tolerated,
                        failed_trials_tolerated=failed_trials_tolerated,
                    )
                except Exception as err:
                    if raise_all_exceptions:
                        raise
                    exceptions.append(err)  # TODO[T53975770]: test
    logger.info(f"Obtained benchmarking test experiments: {tests}")
    return tests
Ejemplo n.º 2
0
def full_benchmark_run(  # Full run, multiple tests.
    problems: Optional[Union[List[BenchmarkProblem], List[str]]] = None,
    methods: Optional[Union[List[GenerationStrategy], List[str]]] = None,
    num_trials: Union[int, List[List[int]]] = 20,
    num_replications: int = 20,
    batch_size: Union[int, List[List[int]]] = 1,
    raise_all_exceptions: bool = False,
    benchmark_test: FunctionType = benchmark_test,
    benchmark_replication: FunctionType = benchmark_replication,
    benchmark_trial: FunctionType = benchmark_trial,
    verbose_logging: bool = True,
    # Number of trials that need to fail for a replication to be considered failed.
    failed_trials_tolerated: int = 5,
    # Number of replications that need to fail for a test to be considered failed.
    failed_replications_tolerated: int = 3,
) -> Dict[str, Dict[str, List[Experiment]]]:
    """Full run of the benchmarking suite. To make benchmarking distrubuted at
    a level of a test, a replication, or a trial (or any combination of those),
    by passing in a wrapped (in some scheduling logic) version of a corresponding
    function from this module.

    Args:
        problems: Problems to benchmark on, represented as BenchmarkProblem-s
            or string keys (must be in standard BOProblems). Defaults to all
            standard BOProblems.
        methods: Methods to benchmark, represented as generation strategies or
            or string keys (must be in standard BOMethods). Defaults to all
            standard BOMethods.
        num_replications: Number of times to run each test (each problem-method
            combination), for an aggregated result.
        num_trials: Number of trials in each test experiment.
        raise_all_exceptions: If set to True, any encountered exception will be
            raised; alternatively, failure tolerance thresholds are used and a few
            number of trials `failed_trials_tolerated` can fail before a replication
            is considered failed, as well some replications
            `failed_replications_tolerated` can fail before a benchmarking test
            is considered failed.
        benchmark_test: Function that runs a single benchmarking test. Defaults
            to `benchmark_test` in this module and must have the same signature.
        benchmark_replication: Function that runs a single benchmarking replication.
            Defaults to `benchmark_replication` in this module and must have the
            same signature.
        benchmark_trial: Function that runs a single trial. Defaults
            to `benchmark_trial` in this module and must have the same signature.
        verbose_logging: Whether logging level should be set to `INFO`.
        failed_trials_tolerated: How many trials can fail before a replication is
            considered failed and aborted. Defaults to 5.
        failed_replications_tolerated: How many replications can fail before a
            test is considered failed and aborted. Defaults to 3.
    """
    exceptions = []
    tests: Dict[str, Dict[str, List[Experiment]]] = {}
    problems, methods = utils.get_problems_and_methods(problems=problems,
                                                       methods=methods)
    for problem_idx, problem in enumerate(problems):
        tests[problem.name] = {}
        for method_idx, method in enumerate(methods):
            tests[problem.name][method.name] = []
            try:
                tests[problem.name][method.name] = benchmark_test(
                    problem=problem,
                    method=method,
                    num_replications=num_replications,
                    # For arguments passed as either numbers, or matrices,
                    # xtract corresponding values for the given combination.
                    num_trials=utils.get_corresponding(num_trials, problem_idx,
                                                       method_idx),
                    batch_size=utils.get_corresponding(batch_size, problem_idx,
                                                       method_idx),
                    benchmark_replication=benchmark_replication,
                    benchmark_trial=benchmark_trial,
                    raise_all_exceptions=raise_all_exceptions,
                    verbose_logging=verbose_logging,
                    failed_replications_tolerated=failed_replications_tolerated,
                    failed_trials_tolerated=failed_trials_tolerated,
                )
            except Exception as err:
                if raise_all_exceptions:
                    raise
                exceptions.append(err)  # TODO[T53975770]: test
    logger.info(f"Obtained benchmarking test experiments: {tests}")
    return tests