Ejemplo n.º 1
0
    def test_get_next_by_random_search_sorted(self,
                                              patch_sample,
                                              patch_ei,
                                              patch_impute):
        values = (10, 1, 9, 2, 8, 3, 7, 4, 6, 5)
        patch_sample.return_value = [ConfigurationMock(i) for i in values]
        patch_ei.return_value = np.array([[_] for _ in values], dtype=float)
        patch_impute.side_effect = lambda l: values
        cs = ConfigurationSpace()
        ei = EI(None)
        rs = RandomSearch(ei, cs)
        rval = rs._maximize(
            runhistory=None, stats=None, num_points=10, _sorted=True
        )
        self.assertEqual(len(rval), 10)
        for i in range(10):
            self.assertIsInstance(rval[i][1], ConfigurationMock)
            self.assertEqual(rval[i][1].value, 10 - i)
            self.assertEqual(rval[i][0], 10 - i)
            self.assertEqual(rval[i][1].origin, 'Random Search (sorted)')

        # Check that config.get_array works as desired and imputation is used
        #  in between, we therefore have to retrieve the value from the mock!
        np.testing.assert_allclose([v.value for v in patch_ei.call_args[0][0]],
                                   np.array(values, dtype=float))
Ejemplo n.º 2
0
 def __init__(
     self,
     acquisition_function: AbstractAcquisitionFunction,
     config_space: ConfigurationSpace,
     rng: Union[bool, np.random.RandomState] = None,
 ):
     super().__init__(acquisition_function, config_space, rng)
     self.random_search = RandomSearch(acquisition_function, config_space,
                                       rng)
     self.local_search = LocalSearch(acquisition_function, config_space,
                                     rng)
     self.max_acq_value = sys.float_info.min
Ejemplo n.º 3
0
    def test_get_next_by_random_search(self, patch):
        def side_effect(size):
            return [ConfigurationMock()] * size

        patch.side_effect = side_effect
        cs = ConfigurationSpace()
        ei = EI(None)
        rs = RandomSearch(ei, cs)
        rval = rs._maximize(
            runhistory=None, stats=None, num_points=10, _sorted=False
        )
        self.assertEqual(len(rval), 10)
        for i in range(10):
            self.assertIsInstance(rval[i][1], ConfigurationMock)
            self.assertEqual(rval[i][1].origin, 'Random Search')
            self.assertEqual(rval[i][0], 0)
Ejemplo n.º 4
0
    def __init__(self,
                 scenario: Scenario,
                 stats: Stats,
                 runhistory: RunHistory,
                 runhistory2epm: AbstractRunHistory2EPM,
                 model: RandomForestWithInstances,
                 acq_optimizer: AcquisitionFunctionMaximizer,
                 acquisition_func: AbstractAcquisitionFunction,
                 rng: np.random.RandomState,
                 restore_incumbent: Configuration = None,
                 random_configuration_chooser: typing.
                 Union[RandomConfigurationChooser] = ChooserNoCoolDown(2.0),
                 predict_x_best: bool = True,
                 min_samples_model: int = 1):
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)
        self.incumbent = restore_incumbent

        self.scenario = scenario
        self.stats = stats
        self.runhistory = runhistory
        self.rh2EPM = runhistory2epm
        self.model = model
        self.acq_optimizer = acq_optimizer
        self.acquisition_func = acquisition_func
        self.rng = rng
        self.random_configuration_chooser = random_configuration_chooser

        self._random_search = RandomSearch(
            acquisition_func,
            self.scenario.cs,  # type: ignore[attr-defined] # noqa F821
            rng,
        )

        self.initial_design_configs = []  # type: typing.List[Configuration]

        self.predict_x_best = predict_x_best

        self.min_samples_model = min_samples_model
        self.currently_considered_budgets = [
            0.0,
        ]
Ejemplo n.º 5
0
    def test_challenger_list_callback(self, patch_sample, patch_ei,
                                      patch_impute):
        values = (10, 1, 9, 2, 8, 3, 7, 4, 6, 5)
        patch_sample.return_value = ConfigurationMock(1)
        patch_ei.return_value = np.array([[_] for _ in values], dtype=float)
        patch_impute.side_effect = lambda l: values
        cs = ConfigurationSpace()
        ei = EI(None)
        rs = RandomSearch(ei, cs)
        rs._maximize = unittest.mock.Mock()
        rs._maximize.return_value = [(0, 0)]

        rval = rs.maximize(
            runhistory=None,
            stats=None,
            num_points=10,
        )
        self.assertEqual(rs._maximize.call_count, 0)
        next(rval)
        self.assertEqual(rs._maximize.call_count, 1)

        random_configuration_chooser = unittest.mock.Mock()
        random_configuration_chooser.check.side_effect = [
            True, False, False, False
        ]
        rs._maximize = unittest.mock.Mock()
        rs._maximize.return_value = [(0, 0), (1, 1)]

        rval = rs.maximize(
            runhistory=None,
            stats=None,
            num_points=10,
            random_configuration_chooser=random_configuration_chooser,
        )
        self.assertEqual(rs._maximize.call_count, 0)
        # The first configuration is chosen at random (see the random_configuration_chooser mock)
        conf = next(rval)
        self.assertIsInstance(conf, ConfigurationMock)
        self.assertEqual(rs._maximize.call_count, 0)
        # The 2nd configuration triggers the call to the callback (see the random_configuration_chooser mock)
        conf = next(rval)
        self.assertEqual(rs._maximize.call_count, 1)
        self.assertEqual(conf, 0)
        # The 3rd configuration doesn't trigger the callback any more
        conf = next(rval)
        self.assertEqual(rs._maximize.call_count, 1)
        self.assertEqual(conf, 1)

        with self.assertRaises(StopIteration):
            next(rval)
Ejemplo n.º 6
0
    def __init__(self,
                 scenario: Scenario,
                 stats: Stats,
                 runhistory: RunHistory,
                 runhistory2epm: AbstractRunHistory2EPM,
                 model: RandomForestWithInstances,
                 acq_optimizer: AcquisitionFunctionMaximizer,
                 acquisition_func: AbstractAcquisitionFunction,
                 rng: np.random.RandomState,
                 restore_incumbent: Configuration = None,
                 random_configuration_chooser: typing.
                 Union[RandomConfigurationChooser] = ChooserNoCoolDown(2.0),
                 predict_x_best: bool = True,
                 min_samples_model: int = 1):
        """
        Interface to train the EPM and generate next configurations

        Parameters
        ----------

        scenario: smac.scenario.scenario.Scenario
            Scenario object
        stats: smac.stats.stats.Stats
            statistics object with configuration budgets
        runhistory: smac.runhistory.runhistory.RunHistory
            runhistory with all runs so far
        model: smac.epm.rf_with_instances.RandomForestWithInstances
            empirical performance model (right now, we support only
            RandomForestWithInstances)
        acq_optimizer: smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
            Optimizer of acquisition function.
        restore_incumbent: Configuration
            incumbent to be used from the start. ONLY used to restore states.
        rng: np.random.RandomState
            Random number generator
        random_configuration_chooser:
            Chooser for random configuration -- one of

            * ChooserNoCoolDown(modulus)
            * ChooserLinearCoolDown(start_modulus, modulus_increment, end_modulus)
        predict_x_best: bool
            Choose x_best for computing the acquisition function via the model instead of via the observations.
        min_samples_model: int
            Minimum number of samples to build a model
        """

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)
        self.incumbent = restore_incumbent

        self.scenario = scenario
        self.stats = stats
        self.runhistory = runhistory
        self.rh2EPM = runhistory2epm
        self.model = model
        self.acq_optimizer = acq_optimizer
        self.acquisition_func = acquisition_func
        self.rng = rng
        self.random_configuration_chooser = random_configuration_chooser

        self._random_search = RandomSearch(
            acquisition_func,
            self.scenario.cs,  # type: ignore[attr-defined] # noqa F821
            rng,
        )

        self.initial_design_configs = []  # type: typing.List[Configuration]

        self.predict_x_best = predict_x_best

        self.min_samples_model = min_samples_model
        self.currently_considered_budgets = [
            0.0,
        ]
Ejemplo n.º 7
0
class EPMChooser(object):
    def __init__(self,
                 scenario: Scenario,
                 stats: Stats,
                 runhistory: RunHistory,
                 runhistory2epm: AbstractRunHistory2EPM,
                 model: RandomForestWithInstances,
                 acq_optimizer: AcquisitionFunctionMaximizer,
                 acquisition_func: AbstractAcquisitionFunction,
                 rng: np.random.RandomState,
                 restore_incumbent: Configuration = None,
                 random_configuration_chooser: typing.
                 Union[RandomConfigurationChooser] = ChooserNoCoolDown(2.0),
                 predict_x_best: bool = True,
                 min_samples_model: int = 1):
        """
        Interface to train the EPM and generate next configurations

        Parameters
        ----------

        scenario: smac.scenario.scenario.Scenario
            Scenario object
        stats: smac.stats.stats.Stats
            statistics object with configuration budgets
        runhistory: smac.runhistory.runhistory.RunHistory
            runhistory with all runs so far
        model: smac.epm.rf_with_instances.RandomForestWithInstances
            empirical performance model (right now, we support only
            RandomForestWithInstances)
        acq_optimizer: smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
            Optimizer of acquisition function.
        restore_incumbent: Configuration
            incumbent to be used from the start. ONLY used to restore states.
        rng: np.random.RandomState
            Random number generator
        random_configuration_chooser:
            Chooser for random configuration -- one of

            * ChooserNoCoolDown(modulus)
            * ChooserLinearCoolDown(start_modulus, modulus_increment, end_modulus)
        predict_x_best: bool
            Choose x_best for computing the acquisition function via the model instead of via the observations.
        min_samples_model: int
            Minimum number of samples to build a model
        """

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)
        self.incumbent = restore_incumbent

        self.scenario = scenario
        self.stats = stats
        self.runhistory = runhistory
        self.rh2EPM = runhistory2epm
        self.model = model
        self.acq_optimizer = acq_optimizer
        self.acquisition_func = acquisition_func
        self.rng = rng
        self.random_configuration_chooser = random_configuration_chooser

        self._random_search = RandomSearch(
            acquisition_func,
            self.scenario.cs,  # type: ignore[attr-defined] # noqa F821
            rng,
        )

        self.initial_design_configs = []  # type: typing.List[Configuration]

        self.predict_x_best = predict_x_best

        self.min_samples_model = min_samples_model
        self.currently_considered_budgets = [
            0.0,
        ]

    def _collect_data_to_train_model(
            self) -> typing.Tuple[np.ndarray, np.ndarray, np.ndarray]:
        # if we use a float value as a budget, we want to train the model only on the highest budget
        available_budgets = []
        for run_key in self.runhistory.data.keys():
            available_budgets.append(run_key.budget)

        # Sort available budgets from highest to lowest budget
        available_budgets = sorted(list(set(available_budgets)), reverse=True)

        # Get #points per budget and if there are enough samples, then build a model
        for b in available_budgets:
            X, Y = self.rh2EPM.transform(self.runhistory, budget_subset=[
                b,
            ])
            if X.shape[0] >= self.min_samples_model:
                self.currently_considered_budgets = [
                    b,
                ]
                configs_array = self.rh2EPM.get_configurations(
                    self.runhistory,
                    budget_subset=self.currently_considered_budgets)
                return X, Y, configs_array

        return np.empty(shape=[0, 0]), np.empty(shape=[
            0,
        ]), np.empty(shape=[0, 0])

    def _get_evaluated_configs(self) -> typing.List[Configuration]:
        return self.runhistory.get_all_configs_per_budget(
            budget_subset=self.currently_considered_budgets)

    def choose_next(
            self,
            incumbent_value: float = None) -> typing.Iterator[Configuration]:
        """Choose next candidate solution with Bayesian optimization. The
        suggested configurations depend on the argument ``acq_optimizer`` to
        the ``SMBO`` class.

        Parameters
        ----------
        incumbent_value: float
            Cost value of incumbent configuration (required for acquisition function);
            If not given, it will be inferred from runhistory or predicted;
            if not given and runhistory is empty, it will raise a ValueError.

        Returns
        -------
        Iterator
        """

        self.logger.debug("Search for next configuration")
        X, Y, X_configurations = self._collect_data_to_train_model()

        if X.shape[0] == 0:
            # Only return a single point to avoid an overly high number of
            # random search iterations
            return self._random_search.maximize(runhistory=self.runhistory,
                                                stats=self.stats,
                                                num_points=1)
        self.model.train(X, Y)

        if incumbent_value is not None:
            best_observation = incumbent_value
            x_best_array = None  # type: typing.Optional[np.ndarray]
        else:
            if self.runhistory.empty():
                raise ValueError("Runhistory is empty and the cost value of "
                                 "the incumbent is unknown.")
            x_best_array, best_observation = self._get_x_best(
                self.predict_x_best, X_configurations)

        self.acquisition_func.update(
            model=self.model,
            eta=best_observation,
            incumbent_array=x_best_array,
            num_data=len(self._get_evaluated_configs()),
            X=X_configurations,
        )

        challengers = self.acq_optimizer.maximize(
            runhistory=self.runhistory,
            stats=self.stats,
            num_points=self.scenario.
            acq_opt_challengers,  # type: ignore[attr-defined] # noqa F821
            random_configuration_chooser=self.random_configuration_chooser)
        return challengers

    def _get_x_best(self, predict: bool,
                    X: np.ndarray) -> typing.Tuple[float, np.ndarray]:
        """Get value, configuration, and array representation of the "best" configuration.

        The definition of best varies depending on the argument ``predict``. If set to ``True``,
        this function will return the stats of the best configuration as predicted by the model,
        otherwise it will return the stats for the best observed configuration.

        Parameters
        ----------
        predict : bool
            Whether to use the predicted or observed best.

        Returns
        -------
        float
        np.ndarry
        Configuration
        """
        if predict:
            costs = list(
                map(
                    lambda x: (
                        self.model.predict_marginalized_over_instances(
                            x.reshape((1, -1)))[0][0][0],
                        x,
                    ),
                    X,
                ))
            costs = sorted(costs, key=lambda t: t[0])
            x_best_array = costs[0][1]
            best_observation = costs[0][0]
            # won't need log(y) if EPM was already trained on log(y)
        else:
            all_configs = self.runhistory.get_all_configs_per_budget(
                budget_subset=self.currently_considered_budgets)
            x_best = self.incumbent
            x_best_array = convert_configurations_to_array(all_configs)
            best_observation = self.runhistory.get_cost(x_best)
            best_observation_as_array = np.array(best_observation).reshape(
                (1, 1))
            # It's unclear how to do this for inv scaling and potential future scaling.
            # This line should be changed if necessary
            best_observation = self.rh2EPM.transform_response_values(
                best_observation_as_array)
            best_observation = best_observation[0][0]

        return x_best_array, best_observation
Ejemplo n.º 8
0
    def __init__(self,
                 scenario: Scenario,
                 stats: Stats,
                 initial_design: InitialDesign,
                 runhistory: RunHistory,
                 runhistory2epm: AbstractRunHistory2EPM,
                 intensifier: Intensifier,
                 aggregate_func: callable,
                 num_run: int,
                 model: RandomForestWithInstances,
                 acq_optimizer: AcquisitionFunctionMaximizer,
                 acquisition_func: AbstractAcquisitionFunction,
                 rng: np.random.RandomState,
                 restore_incumbent: Configuration = None,
                 random_configuration_chooser: typing.Union[
                     ChooserNoCoolDown,
                     ChooserLinearCoolDown] = ChooserNoCoolDown(2.0),
                 predict_incumbent: bool = True):
        """
        Interface that contains the main Bayesian optimization loop

        Parameters
        ----------
        scenario: smac.scenario.scenario.Scenario
            Scenario object
        stats: Stats
            statistics object with configuration budgets
        initial_design: InitialDesign
            initial sampling design
        runhistory: RunHistory
            runhistory with all runs so far
        runhistory2epm : AbstractRunHistory2EPM
            Object that implements the AbstractRunHistory2EPM to convert runhistory
            data into EPM data
        intensifier: Intensifier
            intensification of new challengers against incumbent configuration
            (probably with some kind of racing on the instances)
        aggregate_func: callable
            how to aggregate the runs in the runhistory to get the performance of a
             configuration
        num_run: int
            id of this run (used for pSMAC)
        model: RandomForestWithInstances
            empirical performance model (right now, we support only
            RandomForestWithInstances)
        acq_optimizer: AcquisitionFunctionMaximizer
            Optimizer of acquisition function.
        acquisition_function : AcquisitionFunction
            Object that implements the AbstractAcquisitionFunction (i.e., infill
            criterion for acq_optimizer)
        restore_incumbent: Configuration
            incumbent to be used from the start. ONLY used to restore states.
        rng: np.random.RandomState
            Random number generator
        random_configuration_chooser
            Chooser for random configuration -- one of
            * ChooserNoCoolDown(modulus)
            * ChooserLinearCoolDown(start_modulus, modulus_increment, end_modulus)
        predict_incumbent: bool
            Use predicted performance of incumbent instead of observed performance
        """

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)
        self.incumbent = restore_incumbent

        self.scenario = scenario
        self.config_space = scenario.cs
        self.stats = stats
        self.initial_design = initial_design
        self.runhistory = runhistory
        self.rh2EPM = runhistory2epm
        self.intensifier = intensifier
        self.aggregate_func = aggregate_func
        self.num_run = num_run
        self.model = model
        self.acq_optimizer = acq_optimizer
        self.acquisition_func = acquisition_func
        self.rng = rng
        self.random_configuration_chooser = random_configuration_chooser

        self._random_search = RandomSearch(acquisition_func, self.config_space,
                                           rng)

        self.predict_incumbent = predict_incumbent
Ejemplo n.º 9
0
class SMBO(object):
    """Interface that contains the main Bayesian optimization loop

    Attributes
    ----------
    logger
    incumbent
    scenario
    config_space
    stats
    initial_design
    runhistory
    rh2EPM
    intensifier
    aggregate_func
    num_run
    model
    acq_optimizer
    acquisition_func
    rng
    random_configuration_chooser
    """
    def __init__(self,
                 scenario: Scenario,
                 stats: Stats,
                 initial_design: InitialDesign,
                 runhistory: RunHistory,
                 runhistory2epm: AbstractRunHistory2EPM,
                 intensifier: Intensifier,
                 aggregate_func: callable,
                 num_run: int,
                 model: RandomForestWithInstances,
                 acq_optimizer: AcquisitionFunctionMaximizer,
                 acquisition_func: AbstractAcquisitionFunction,
                 rng: np.random.RandomState,
                 restore_incumbent: Configuration = None,
                 random_configuration_chooser: typing.Union[
                     ChooserNoCoolDown,
                     ChooserLinearCoolDown] = ChooserNoCoolDown(2.0),
                 predict_incumbent: bool = True):
        """
        Interface that contains the main Bayesian optimization loop

        Parameters
        ----------
        scenario: smac.scenario.scenario.Scenario
            Scenario object
        stats: Stats
            statistics object with configuration budgets
        initial_design: InitialDesign
            initial sampling design
        runhistory: RunHistory
            runhistory with all runs so far
        runhistory2epm : AbstractRunHistory2EPM
            Object that implements the AbstractRunHistory2EPM to convert runhistory
            data into EPM data
        intensifier: Intensifier
            intensification of new challengers against incumbent configuration
            (probably with some kind of racing on the instances)
        aggregate_func: callable
            how to aggregate the runs in the runhistory to get the performance of a
             configuration
        num_run: int
            id of this run (used for pSMAC)
        model: RandomForestWithInstances
            empirical performance model (right now, we support only
            RandomForestWithInstances)
        acq_optimizer: AcquisitionFunctionMaximizer
            Optimizer of acquisition function.
        acquisition_function : AcquisitionFunction
            Object that implements the AbstractAcquisitionFunction (i.e., infill
            criterion for acq_optimizer)
        restore_incumbent: Configuration
            incumbent to be used from the start. ONLY used to restore states.
        rng: np.random.RandomState
            Random number generator
        random_configuration_chooser
            Chooser for random configuration -- one of
            * ChooserNoCoolDown(modulus)
            * ChooserLinearCoolDown(start_modulus, modulus_increment, end_modulus)
        predict_incumbent: bool
            Use predicted performance of incumbent instead of observed performance
        """

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)
        self.incumbent = restore_incumbent

        self.scenario = scenario
        self.config_space = scenario.cs
        self.stats = stats
        self.initial_design = initial_design
        self.runhistory = runhistory
        self.rh2EPM = runhistory2epm
        self.intensifier = intensifier
        self.aggregate_func = aggregate_func
        self.num_run = num_run
        self.model = model
        self.acq_optimizer = acq_optimizer
        self.acquisition_func = acquisition_func
        self.rng = rng
        self.random_configuration_chooser = random_configuration_chooser

        self._random_search = RandomSearch(acquisition_func, self.config_space,
                                           rng)

        self.predict_incumbent = predict_incumbent

    def start(self):
        """Starts the Bayesian Optimization loop.
        Detects whether we the optimization is restored from previous state.
        """
        self.stats.start_timing()
        # Initialization, depends on input
        if self.stats.ta_runs == 0 and self.incumbent is None:
            self.incumbent = self.initial_design.run()

        elif self.stats.ta_runs > 0 and self.incumbent is None:
            raise ValueError(
                "According to stats there have been runs performed, "
                "but the optimizer cannot detect an incumbent. Did "
                "you set the incumbent (e.g. after restoring state)?")
        elif self.stats.ta_runs == 0 and self.incumbent is not None:
            raise ValueError(
                "An incumbent is specified, but there are no runs "
                "recorded in the Stats-object. If you're restoring "
                "a state, please provide the Stats-object.")
        else:
            # Restoring state!
            self.logger.info(
                "State Restored! Starting optimization with "
                "incumbent %s", self.incumbent)
            self.logger.info("State restored with following budget:")
            self.stats.print_stats()

        # To be on the safe side -> never return "None" as incumbent
        if not self.incumbent:
            self.incumbent = self.scenario.cs.get_default_configuration()

    def run(self):
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.start()

        # Main BO loop
        while True:
            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_dirs=self.scenario.input_psmac_dirs,
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()
            X, Y = self.rh2EPM.transform(self.runhistory)

            self.logger.debug("Search for next configuration")
            # get all found configurations sorted according to acq
            challengers = self.choose_next(X, Y)

            time_spent = time.time() - start_time
            time_left = self._get_timebound_for_intensification(time_spent)

            self.logger.debug("Intensify")

            self.incumbent, inc_perf = self.intensifier.intensify(
                challengers=challengers,
                incumbent=self.incumbent,
                run_history=self.runhistory,
                aggregate_func=self.aggregate_func,
                time_bound=max(self.intensifier._min_time, time_left))

            if self.scenario.shared_model:
                pSMAC.write(
                    run_history=self.runhistory,
                    output_directory=self.scenario.output_dir_for_this_run,
                    logger=self.logger)

            logging.debug(
                "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)"
                % (self.stats.get_remaing_time_budget(),
                   self.stats.get_remaining_ta_budget(),
                   self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent

    def choose_next(self,
                    X: np.ndarray,
                    Y: np.ndarray,
                    incumbent_value: float = None):
        """Choose next candidate solution with Bayesian optimization. The
        suggested configurations depend on the argument ``acq_optimizer`` to
        the ``SMBO`` class.

        Parameters
        ----------
        X : (N, D) numpy array
            Each row contains a configuration and one set of
            instance features.
        Y : (N, O) numpy array
            The function values for each configuration instance pair.
        incumbent_value: float
            Cost value of incumbent configuration
            (required for acquisition function);
            if not given, it will be inferred from runhistory;
            if not given and runhistory is empty,
            it will raise a ValueError

        Returns
        -------
        Iterable
        """
        if X.shape[0] == 0:
            # Only return a single point to avoid an overly high number of
            # random search iterations
            return self._random_search.maximize(runhistory=self.runhistory,
                                                stats=self.stats,
                                                num_points=1)

        self.model.train(X, Y)

        if incumbent_value is None:
            if self.runhistory.empty():
                raise ValueError("Runhistory is empty and the cost value of "
                                 "the incumbent is unknown.")
            incumbent_value = self._get_incumbent_value()

        self.acquisition_func.update(model=self.model,
                                     eta=incumbent_value,
                                     num_data=len(self.runhistory.data))

        challengers = self.acq_optimizer.maximize(
            runhistory=self.runhistory,
            stats=self.stats,
            num_points=self.scenario.acq_opt_challengers,
            random_configuration_chooser=self.random_configuration_chooser)
        return challengers

    def _get_incumbent_value(self):
        ''' get incumbent value either from runhistory
            or from best predicted performance on configs in runhistory
            (depends on self.predict_incumbent)"

            Return
            ------
            float
        '''
        if self.predict_incumbent:
            configs = convert_configurations_to_array(
                self.runhistory.get_all_configs())
            costs = list(
                map(
                    lambda config: self.model.
                    predict_marginalized_over_instances(config.reshape(
                        (1, -1)))[0][0][0],
                    configs,
                ))
            incumbent_value = np.min(costs)
            # won't need log(y) if EPM was already trained on log(y)

        else:
            if self.runhistory.empty():
                raise ValueError("Runhistory is empty and the cost value of "
                                 "the incumbent is unknown.")
            incumbent_value = self.runhistory.get_cost(self.incumbent)
            # It's unclear how to do this for inv scaling and potential future scaling. This line should be changed if
            # necessary
            incumbent_value_as_array = np.array(incumbent_value).reshape(
                (1, 1))
            incumbent_value = self.rh2EPM.transform_response_values(
                incumbent_value_as_array)
            incumbent_value = incumbent_value[0][0]

        return incumbent_value

    def validate(self,
                 config_mode='inc',
                 instance_mode='train+test',
                 repetitions=1,
                 use_epm=False,
                 n_jobs=-1,
                 backend='threading'):
        """Create validator-object and run validation, using
        scenario-information, runhistory from smbo and tae_runner from intensify

        Parameters
        ----------
        config_mode: str or list<Configuration>
            string or directly a list of Configuration
            str from [def, inc, def+inc, wallclock_time, cpu_time, all]
            time evaluates at cpu- or wallclock-timesteps of:
            [max_time/2^0, max_time/2^1, max_time/2^3, ..., default]
            with max_time being the highest recorded time
        instance_mode: string
            what instances to use for validation, from [train, test, train+test]
        repetitions: int
            number of repetitions in nondeterministic algorithms (in
            deterministic will be fixed to 1)
        use_epm: bool
            whether to use an EPM instead of evaluating all runs with the TAE
        n_jobs: int
            number of parallel processes used by joblib

        Returns
        -------
        runhistory: RunHistory
            runhistory containing all specified runs
        """
        if isinstance(config_mode, str):
            traj_fn = os.path.join(self.scenario.output_dir_for_this_run,
                                   "traj_aclib2.json")
            trajectory = TrajLogger.read_traj_aclib_format(fn=traj_fn,
                                                           cs=self.scenario.cs)
        else:
            trajectory = None
        if self.scenario.output_dir_for_this_run:
            new_rh_path = os.path.join(self.scenario.output_dir_for_this_run,
                                       "validated_runhistory.json")
        else:
            new_rh_path = None

        validator = Validator(self.scenario, trajectory, self.rng)
        if use_epm:
            new_rh = validator.validate_epm(config_mode=config_mode,
                                            instance_mode=instance_mode,
                                            repetitions=repetitions,
                                            runhistory=self.runhistory,
                                            output_fn=new_rh_path)
        else:
            new_rh = validator.validate(config_mode,
                                        instance_mode,
                                        repetitions,
                                        n_jobs,
                                        backend,
                                        self.runhistory,
                                        self.intensifier.tae_runner,
                                        output_fn=new_rh_path)
        return new_rh

    def _get_timebound_for_intensification(self, time_spent: float):
        """Calculate time left for intensify from the time spent on
        choosing challengers using the fraction of time intended for
        intensification (which is specified in
        scenario.intensification_percentage).

        Parameters
        ----------
        time_spent : float

        Returns
        -------
        time_left : float
        """
        frac_intensify = self.scenario.intensification_percentage
        if frac_intensify <= 0 or frac_intensify >= 1:
            raise ValueError("The value for intensification_percentage-"
                             "option must lie in (0,1), instead: %.2f" %
                             (frac_intensify))
        total_time = time_spent / (1 - frac_intensify)
        time_left = frac_intensify * total_time
        self.logger.debug("Total time: %.4f, time spent on choosing next "
                          "configurations: %.4f (%.2f), time left for "
                          "intensification: %.4f (%.2f)" %
                          (total_time, time_spent,
                           (1 - frac_intensify), time_left, frac_intensify))
        return time_left

    def _component_builder(self, conf:typing.Union[Configuration, dict]) \
        -> typing.Tuple[AbstractAcquisitionFunction, AbstractEPM]:
        """
            builds new Acquisition function object
            and EPM object and returns these

            Parameters
            ----------
            conf: typing.Union[Configuration, dict]
                configuration specificing "model" and "acq_func"

            Returns
            -------
            typing.Tuple[AbstractAcquisitionFunction, AbstractEPM]

        """
        types, bounds = get_types(
            self.config_space, instance_features=self.scenario.feature_array)

        if conf["model"] == "RF":
            model = RandomForestWithInstances(
                configspace=self.config_space,
                types=types,
                bounds=bounds,
                instance_features=self.scenario.feature_array,
                seed=self.rng.randint(MAXINT),
                pca_components=conf.get("pca_dim", self.scenario.PCA_DIM),
                log_y=conf.get("log_y", self.scenario.transform_y
                               in ["LOG", "LOGS"]),
                num_trees=conf.get("num_trees", self.scenario.rf_num_trees),
                do_bootstrapping=conf.get("do_bootstrapping",
                                          self.scenario.rf_do_bootstrapping),
                ratio_features=conf.get("ratio_features",
                                        self.scenario.rf_ratio_features),
                min_samples_split=conf.get("min_samples_split",
                                           self.scenario.rf_min_samples_split),
                min_samples_leaf=conf.get("min_samples_leaf",
                                          self.scenario.rf_min_samples_leaf),
                max_depth=conf.get("max_depth", self.scenario.rf_max_depth),
            )

        elif conf["model"] == "GP":
            from smac.epm.gp_kernels import ConstantKernel, HammingKernel, WhiteKernel, Matern

            cov_amp = ConstantKernel(
                2.0,
                constant_value_bounds=(np.exp(-10), np.exp(2)),
                prior=LognormalPrior(mean=0.0, sigma=1.0, rng=self.rng),
            )

            cont_dims = np.nonzero(types == 0)[0]
            cat_dims = np.nonzero(types != 0)[0]

            if len(cont_dims) > 0:
                exp_kernel = Matern(
                    np.ones([len(cont_dims)]),
                    [(np.exp(-10), np.exp(2)) for _ in range(len(cont_dims))],
                    nu=2.5,
                    operate_on=cont_dims,
                )

            if len(cat_dims) > 0:
                ham_kernel = HammingKernel(
                    np.ones([len(cat_dims)]),
                    [(np.exp(-10), np.exp(2)) for _ in range(len(cat_dims))],
                    operate_on=cat_dims,
                )
            noise_kernel = WhiteKernel(
                noise_level=1e-8,
                noise_level_bounds=(np.exp(-25), np.exp(2)),
                prior=HorseshoePrior(scale=0.1, rng=self.rng),
            )

            if len(cont_dims) > 0 and len(cat_dims) > 0:
                # both
                kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel
            elif len(cont_dims) > 0 and len(cat_dims) == 0:
                # only cont
                kernel = cov_amp * exp_kernel + noise_kernel
            elif len(cont_dims) == 0 and len(cat_dims) > 0:
                # only cont
                kernel = cov_amp * ham_kernel + noise_kernel
            else:
                raise ValueError()

            n_mcmc_walkers = 3 * len(kernel.theta)
            if n_mcmc_walkers % 2 == 1:
                n_mcmc_walkers += 1

            model = GaussianProcessMCMC(
                self.config_space,
                types=types,
                bounds=bounds,
                kernel=kernel,
                n_mcmc_walkers=n_mcmc_walkers,
                chain_length=250,
                burnin_steps=250,
                normalize_y=True,
                seed=self.rng.randint(low=0, high=10000),
            )

        if conf["acq_func"] == "EI":
            acq = EI(model=model, par=conf.get("par_ei", 0))
        elif conf["acq_func"] == "LCB":
            acq = LCB(model=model, par=conf.get("par_lcb", 0))
        elif conf["acq_func"] == "PI":
            acq = PI(model=model, par=conf.get("par_pi", 0))
        elif conf["acq_func"] == "LogEI":
            # par value should be in log-space
            acq = LogEI(model=model, par=conf.get("par_logei", 0))

        return acq, model

    def _get_acm_cs(self):
        """
            returns a configuration space
            designed for querying ~smac.optimizer.smbo._component_builder

            Returns
            -------
                ConfigurationSpace
        """

        cs = ConfigurationSpace()
        cs.seed(self.rng.randint(0, 2**20))

        if 'gp' in smac.extras_installed:
            model = CategoricalHyperparameter("model", choices=("RF", "GP"))
        else:
            model = Constant("model", value="RF")

        num_trees = Constant("num_trees", value=10)
        bootstrap = CategoricalHyperparameter("do_bootstrapping",
                                              choices=(True, False),
                                              default_value=True)
        ratio_features = CategoricalHyperparameter("ratio_features",
                                                   choices=(3 / 6, 4 / 6,
                                                            5 / 6, 1),
                                                   default_value=1)
        min_split = UniformIntegerHyperparameter("min_samples_to_split",
                                                 lower=1,
                                                 upper=10,
                                                 default_value=2)
        min_leaves = UniformIntegerHyperparameter("min_samples_in_leaf",
                                                  lower=1,
                                                  upper=10,
                                                  default_value=1)

        cs.add_hyperparameters([
            model, num_trees, bootstrap, ratio_features, min_split, min_leaves
        ])

        inc_num_trees = InCondition(num_trees, model, ["RF"])
        inc_bootstrap = InCondition(bootstrap, model, ["RF"])
        inc_ratio_features = InCondition(ratio_features, model, ["RF"])
        inc_min_split = InCondition(min_split, model, ["RF"])
        inc_min_leavs = InCondition(min_leaves, model, ["RF"])

        cs.add_conditions([
            inc_num_trees, inc_bootstrap, inc_ratio_features, inc_min_split,
            inc_min_leavs
        ])

        acq = CategoricalHyperparameter("acq_func",
                                        choices=("EI", "LCB", "PI", "LogEI"))
        par_ei = UniformFloatHyperparameter("par_ei", lower=-10, upper=10)
        par_pi = UniformFloatHyperparameter("par_pi", lower=-10, upper=10)
        par_logei = UniformFloatHyperparameter("par_logei",
                                               lower=0.001,
                                               upper=100,
                                               log=True)
        par_lcb = UniformFloatHyperparameter("par_lcb",
                                             lower=0.0001,
                                             upper=0.9999)

        cs.add_hyperparameters([acq, par_ei, par_pi, par_logei, par_lcb])

        inc_par_ei = InCondition(par_ei, acq, ["EI"])
        inc_par_pi = InCondition(par_pi, acq, ["PI"])
        inc_par_logei = InCondition(par_logei, acq, ["LogEI"])
        inc_par_lcb = InCondition(par_lcb, acq, ["LCB"])

        cs.add_conditions([inc_par_ei, inc_par_pi, inc_par_logei, inc_par_lcb])

        return cs
Ejemplo n.º 10
0
class SMBO(object):

    """Interface that contains the main Bayesian optimization loop

    Attributes
    ----------
    logger
    incumbent
    scenario
    config_space
    stats
    initial_design
    runhistory
    rh2EPM
    intensifier
    aggregate_func
    num_run
    model
    acq_optimizer
    acquisition_func
    rng
    """

    def __init__(self,
                 scenario: Scenario,
                 stats: Stats,
                 initial_design: InitialDesign,
                 runhistory: RunHistory,
                 runhistory2epm: AbstractRunHistory2EPM,
                 intensifier: Intensifier,
                 aggregate_func: callable,
                 num_run: int,
                 model: RandomForestWithInstances,
                 acq_optimizer: AcquisitionFunctionMaximizer,
                 acquisition_func: AbstractAcquisitionFunction,
                 rng: np.random.RandomState,
                 restore_incumbent: Configuration=None):
        """
        Interface that contains the main Bayesian optimization loop

        Parameters
        ----------
        scenario: smac.scenario.scenario.Scenario
            Scenario object
        stats: Stats
            statistics object with configuration budgets
        initial_design: InitialDesign
            initial sampling design
        runhistory: RunHistory
            runhistory with all runs so far
        runhistory2epm : AbstractRunHistory2EPM
            Object that implements the AbstractRunHistory2EPM to convert runhistory
            data into EPM data
        intensifier: Intensifier
            intensification of new challengers against incumbent configuration
            (probably with some kind of racing on the instances)
        aggregate_func: callable
            how to aggregate the runs in the runhistory to get the performance of a
             configuration
        num_run: int
            id of this run (used for pSMAC)
        model: RandomForestWithInstances
            empirical performance model (right now, we support only
            RandomForestWithInstances)
        acq_optimizer: AcquisitionFunctionMaximizer
            Optimizer of acquisition function.
        acquisition_function : AcquisitionFunction
            Object that implements the AbstractAcquisitionFunction (i.e., infill
            criterion for acq_optimizer)
        restore_incumbent: Configuration
            incumbent to be used from the start. ONLY used to restore states.
        rng: np.random.RandomState
            Random number generator
        """

        self.logger = logging.getLogger(
            self.__module__ + "." + self.__class__.__name__)
        self.incumbent = restore_incumbent

        self.scenario = scenario
        self.config_space = scenario.cs
        self.stats = stats
        self.initial_design = initial_design
        self.runhistory = runhistory
        self.rh2EPM = runhistory2epm
        self.intensifier = intensifier
        self.aggregate_func = aggregate_func
        self.num_run = num_run
        self.model = model
        self.acq_optimizer = acq_optimizer
        self.acquisition_func = acquisition_func
        self.rng = rng

        self._random_search = RandomSearch(
            acquisition_func, self.config_space, rng
        )

    def start(self):
        """Starts the Bayesian Optimization loop.
        Detects whether we the optimization is restored from previous state.
        """
        self.stats.start_timing()
        # Initialization, depends on input
        if self.stats.ta_runs == 0 and self.incumbent is None:
            try:
                self.incumbent = self.initial_design.run()
            except FirstRunCrashedException as err:
                if self.scenario.abort_on_first_run_crash:
                    raise
        elif self.stats.ta_runs > 0 and self.incumbent is None:
            raise ValueError("According to stats there have been runs performed, "
                             "but the optimizer cannot detect an incumbent. Did "
                             "you set the incumbent (e.g. after restoring state)?")
        elif self.stats.ta_runs == 0 and self.incumbent is not None:
            raise ValueError("An incumbent is specified, but there are no runs "
                             "recorded in the Stats-object. If you're restoring "
                             "a state, please provide the Stats-object.")
        else:
            # Restoring state!
            self.logger.info("State Restored! Starting optimization with "
                             "incumbent %s", self.incumbent)
            self.logger.info("State restored with following budget:")
            self.stats.print_stats()

    def run(self):
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.start()

        # Main BO loop
        while True:
            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_dirs=self.scenario.input_psmac_dirs,
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()
            X, Y = self.rh2EPM.transform(self.runhistory)
            self.logger.debug("Search for next configuration")
            # get all found configurations sorted according to acq
            challengers = self.choose_next(X, Y)

            time_spent = time.time() - start_time
            time_left = self._get_timebound_for_intensification(time_spent)

            self.logger.debug("Intensify")

            self.incumbent, inc_perf = self.intensifier.intensify(
                challengers=challengers,
                incumbent=self.incumbent,
                run_history=self.runhistory,
                aggregate_func=self.aggregate_func,
                time_bound=max(self.intensifier._min_time, time_left))

            if self.scenario.shared_model:
                pSMAC.write(run_history=self.runhistory,
                            output_directory=self.scenario.output_dir_for_this_run)

            logging.debug("Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % (
                self.stats.get_remaing_time_budget(),
                self.stats.get_remaining_ta_budget(),
                self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent

    def choose_next(self, X: np.ndarray, Y: np.ndarray, incumbent_value: float=None):
        """Choose next candidate solution with Bayesian optimization. The 
        suggested configurations depend on the argument ``acq_optimizer`` to
        the ``SMBO`` class.

        Parameters
        ----------
        X : (N, D) numpy array
            Each row contains a configuration and one set of
            instance features.
        Y : (N, O) numpy array
            The function values for each configuration instance pair.
        incumbent_value: float
            Cost value of incumbent configuration
            (required for acquisition function);
            if not given, it will be inferred from runhistory;
            if not given and runhistory is empty,
            it will raise a ValueError

        Returns
        -------
        Iterable
        """
        import pdb
        pdb.set_trace()
        if X.shape[0] == 0:
            # Only return a single point to avoid an overly high number of
            # random search iterations
            return self._random_search.maximize(runhistory=self.runhistory, stats=self.stats, num_points=1)

        self.model.train(X, Y)

        if incumbent_value is None:
            if self.runhistory.empty():
                raise ValueError("Runhistory is empty and the cost value of the incumbent is unknown.")
            incumbent_value = self.runhistory.get_cost(self.incumbent)

        self.acquisition_func.update(model=self.model, eta=incumbent_value)

        challengers = self.acq_optimizer.maximize(self.runhistory, self.stats, 5000)
        return challengers

    def validate(self, config_mode='inc', instance_mode='train+test',
                 repetitions=1, use_epm=False, n_jobs=-1, backend='threading'):
        """Create validator-object and run validation, using
        scenario-information, runhistory from smbo and tae_runner from intensify

        Parameters
        ----------
        config_mode: str or list<Configuration>
            string or directly a list of Configuration
            str from [def, inc, def+inc, wallclock_time, cpu_time, all]
            time evaluates at cpu- or wallclock-timesteps of:
            [max_time/2^0, max_time/2^1, max_time/2^3, ..., default]
            with max_time being the highest recorded time
        instance_mode: string
            what instances to use for validation, from [train, test, train+test]
        repetitions: int
            number of repetitions in nondeterministic algorithms (in
            deterministic will be fixed to 1)
        use_epm: bool
            whether to use an EPM instead of evaluating all runs with the TAE
        n_jobs: int
            number of parallel processes used by joblib

        Returns
        -------
        runhistory: RunHistory
            runhistory containing all specified runs
        """
        traj_fn = os.path.join(self.scenario.output_dir_for_this_run, "traj_aclib2.json")
        trajectory = TrajLogger.read_traj_aclib_format(fn=traj_fn, cs=self.scenario.cs)
        new_rh_path = os.path.join(self.scenario.output_dir_for_this_run, "validated_runhistory.json")

        validator = Validator(self.scenario, trajectory, self.rng)
        if use_epm:
            new_rh = validator.validate_epm(config_mode=config_mode,
                                            instance_mode=instance_mode,
                                            repetitions=repetitions,
                                            runhistory=self.runhistory,
                                            output=new_rh_path)
        else:
            new_rh = validator.validate(config_mode, instance_mode, repetitions,
                                        n_jobs, backend, self.runhistory,
                                        self.intensifier.tae_runner,
                                        new_rh_path)
        return new_rh

    def _get_timebound_for_intensification(self, time_spent):
        """Calculate time left for intensify from the time spent on
        choosing challengers using the fraction of time intended for
        intensification (which is specified in
        scenario.intensification_percentage).

        Parameters
        ----------
        time_spent : float

        Returns
        -------
        time_left : float
        """
        frac_intensify = self.scenario.intensification_percentage
        if frac_intensify <= 0 or frac_intensify >= 1:
            raise ValueError("The value for intensification_percentage-"
                             "option must lie in (0,1), instead: %.2f" %
                             (frac_intensify))
        total_time = time_spent / (1 - frac_intensify)
        time_left = frac_intensify * total_time
        self.logger.debug("Total time: %.4f, time spent on choosing next "
                          "configurations: %.4f (%.2f), time left for "
                          "intensification: %.4f (%.2f)" %
                          (total_time, time_spent, (1 - frac_intensify), time_left, frac_intensify))
        return time_left
Ejemplo n.º 11
0
class InterleavedLocalAndRandomSearch(AcquisitionFunctionMaximizer):
    """Implements SMAC's default acquisition function optimization.
    
    This optimizer performs local search from the previous best points 
    according, to the acquisition function, uses the acquisition function to 
    sort randomly sampled configurations and interleaves unsorted, randomly 
    sampled configurations in between.
    
    Parameters
    ----------
    acquisition_function : ~smac.optimizer.acquisition.AbstractAcquisitionFunction
        
    config_space : ~smac.configspace.ConfigurationSpace
    
    rng : np.random.RandomState or int, optional
    """
    def __init__(
        self,
        acquisition_function: AbstractAcquisitionFunction,
        config_space: ConfigurationSpace,
        rng: Union[bool, np.random.RandomState] = None,
    ):
        super().__init__(acquisition_function, config_space, rng)
        self.random_search = RandomSearch(acquisition_function, config_space,
                                          rng)
        self.local_search = LocalSearch(acquisition_function, config_space,
                                        rng)
        self.max_acq_value = sys.float_info.min

    def maximize(self, runhistory: RunHistory, stats: Stats, num_points: int,
                 *args) -> Iterable[Configuration]:
        next_configs_by_local_search = self.local_search._maximize(
            runhistory,
            stats,
            10,
        )

        # Get configurations sorted by EI
        next_configs_by_random_search_sorted = self.random_search._maximize(
            runhistory,
            stats,
            num_points - len(next_configs_by_local_search),
            _sorted=True,
        )

        # Having the configurations from random search, sorted by their
        # acquisition function value is important for the first few iterations
        # of SMAC. As long as the random forest predicts constant value, we
        # want to use only random configurations. Having them at the begging of
        # the list ensures this (even after adding the configurations by local
        # search, and then sorting them)
        next_configs_by_acq_value = (next_configs_by_random_search_sorted +
                                     next_configs_by_local_search)
        next_configs_by_acq_value.sort(reverse=True, key=lambda x: x[0])
        self.logger.debug(
            "First 10 acq func (origin) values of selected configurations: %s",
            str([[_[0], _[1].origin] for _ in next_configs_by_acq_value[:10]]))
        # store the max last expansion (challengers generation)
        self.max_acq_value = next_configs_by_acq_value[0][0]

        next_configs_by_acq_value = [_[1] for _ in next_configs_by_acq_value]

        challengers = ChallengerList(next_configs_by_acq_value,
                                     self.config_space)
        return challengers

    def _maximize(self, runhistory: RunHistory, stats: Stats,
                  num_points: int) -> Iterable[Tuple[float, Configuration]]:
        raise NotImplementedError()
Ejemplo n.º 12
0
    def __init__(self,
                 scenario: Scenario,
                 tae_runner: ExecuteTARun = None,
                 runhistory: RunHistory = None,
                 intensifier: Intensifier = None,
                 initial_design: InitialDesign = None,
                 initial_configurations: typing.List[Configuration] = None,
                 stats: Stats = None,
                 rng: np.random.RandomState = None,
                 run_id: int = 1):
        """
        Constructor

        Parameters
        ----------
        scenario: smac.scenario.scenario.Scenario
            Scenario object
        tae_runner: smac.tae.execute_ta_run.ExecuteTARun or callable
            Callable or implementation of
            :class:`~smac.tae.execute_ta_run.ExecuteTARun`. In case a
            callable is passed it will be wrapped by
            :class:`~smac.tae.execute_func.ExecuteTAFuncDict`.
            If not set, it will be initialized with the
            :class:`~smac.tae.execute_ta_run_old.ExecuteTARunOld`.
        runhistory: RunHistory
            Runhistory to store all algorithm runs
        intensifier: Intensifier
            intensification object to issue a racing to decide the current incumbent
        initial_design: InitialDesign
            initial sampling design
        initial_configurations: typing.List[Configuration]
            list of initial configurations for initial design --
            cannot be used together with initial_design
        stats: Stats
            optional stats object
        rng: np.random.RandomState
            Random number generator
        run_id: int, (default: 1)
            Run ID will be used as subfolder for output_dir.

        """
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        # initial random number generator
        _, rng = get_rng(rng=rng, logger=self.logger)

        # initial conversion of runhistory into EPM data
        # since ROAR does not really use it the converted data
        # we simply use a cheap RunHistory2EPM here
        num_params = len(scenario.cs.get_hyperparameters())
        runhistory2epm = RunHistory2EPM4Cost(scenario=scenario,
                                             num_params=num_params,
                                             success_states=[
                                                 StatusType.SUCCESS,
                                             ],
                                             impute_censored_data=False,
                                             impute_state=None)

        aggregate_func = average_cost
        # initialize empty runhistory
        if runhistory is None:
            runhistory = RunHistory(aggregate_func=aggregate_func)
        # inject aggr_func if necessary
        if runhistory.aggregate_func is None:
            runhistory.aggregate_func = aggregate_func

        self.stats = Stats(scenario)
        rs = RandomSearch(
            acquisition_function=None,
            config_space=scenario.cs,
        )

        # use SMAC facade
        super().__init__(
            scenario=scenario,
            tae_runner=tae_runner,
            runhistory=runhistory,
            intensifier=intensifier,
            runhistory2epm=runhistory2epm,
            initial_design=initial_design,
            initial_configurations=initial_configurations,
            stats=stats,
            rng=rng,
            run_id=run_id,
            acquisition_function_optimizer=rs,
        )
Ejemplo n.º 13
0
    def __init__(
        self,
        scenario: Scenario,
        stats: Stats,
        initial_design: InitialDesign,
        runhistory: RunHistory,
        runhistory2epm: AbstractRunHistory2EPM,
        intensifier: Intensifier,
        aggregate_func: callable,
        num_run: int,
        model: AbstractEPM,
        acq_optimizer: AcquisitionFunctionMaximizer,
        acquisition_func: AbstractAcquisitionFunction,
        rng: np.random.RandomState,
        restore_incumbent: Configuration = None,
        # 强行在smbo中加入训练集和验证集
        hoag: AbstractHOAG = None,
        # 参数服务器worker的脚本文件路径
        #server: Server = None,
        bayesian_optimization: bool = False):
        """
        Interface that contains the main Bayesian optimization loop

        Parameters
        ----------
        scenario: smac.scenario.scenario.Scenario
            Scenario object
        stats: Stats
            statistics object with configuration budgets
        initial_design: InitialDesign
            initial sampling design
        runhistory: RunHistory
            runhistory with all runs so far
        runhistory2epm : AbstractRunHistory2EPM
            Object that implements the AbstractRunHistory2EPM to convert runhistory
            data into EPM data
        intensifier: Intensifier
            intensification of new challengers against incumbent configuration
            (probably with some kind of racing on the instances)
        aggregate_func: callable
            how to aggregate the runs in the runhistory to get the performance of a
             configuration
        num_run: int
            id of this run (used for pSMAC)
        model: AbstractEPM
            empirical performance model (right now, we support only
            AbstractEPM)
        acq_optimizer: AcquisitionFunctionMaximizer
            Optimizer of acquisition function.
        acquisition_function : AcquisitionFunction
            Object that implements the AbstractAcquisitionFunction (i.e., infill
            criterion for acq_optimizer)
        restore_incumbent: Configuration
            incumbent to be used from the start. ONLY used to restore states.
        rng: np.random.RandomState
            Random number generator
        """

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)
        self.incumbent = restore_incumbent

        self.scenario = scenario
        self.config_space = scenario.cs
        self.stats = stats
        self.initial_design = initial_design
        self.runhistory = runhistory
        self.rh2EPM = runhistory2epm
        self.intensifier = intensifier
        self.aggregate_func = aggregate_func
        self.num_run = num_run
        self.model = model
        self.acq_optimizer = acq_optimizer
        self.acquisition_func = acquisition_func
        self.rng = rng
        # hoag的类,直接使用hoag的fit,predict等
        self.hoag = hoag
        # 保存server端进程
        #self.server = server
        self.server = None
        self.bayesian_optimization = bayesian_optimization

        self._random_search = RandomSearch(acquisition_func, self.config_space,
                                           rng)
Ejemplo n.º 14
0
class SMBO(object):
    """Interface that contains the main Bayesian optimization loop

    Attributes
    ----------
    logger
    incumbent
    scenario
    config_space
    stats
    initial_design
    runhistory
    rh2EPM
    intensifier
    aggregate_func
    num_run
    model
    acq_optimizer
    acquisition_func
    rng
    """
    def __init__(
        self,
        scenario: Scenario,
        stats: Stats,
        initial_design: InitialDesign,
        runhistory: RunHistory,
        runhistory2epm: AbstractRunHistory2EPM,
        intensifier: Intensifier,
        aggregate_func: callable,
        num_run: int,
        model: AbstractEPM,
        acq_optimizer: AcquisitionFunctionMaximizer,
        acquisition_func: AbstractAcquisitionFunction,
        rng: np.random.RandomState,
        restore_incumbent: Configuration = None,
        # 强行在smbo中加入训练集和验证集
        hoag: AbstractHOAG = None,
        # 参数服务器worker的脚本文件路径
        #server: Server = None,
        bayesian_optimization: bool = False):
        """
        Interface that contains the main Bayesian optimization loop

        Parameters
        ----------
        scenario: smac.scenario.scenario.Scenario
            Scenario object
        stats: Stats
            statistics object with configuration budgets
        initial_design: InitialDesign
            initial sampling design
        runhistory: RunHistory
            runhistory with all runs so far
        runhistory2epm : AbstractRunHistory2EPM
            Object that implements the AbstractRunHistory2EPM to convert runhistory
            data into EPM data
        intensifier: Intensifier
            intensification of new challengers against incumbent configuration
            (probably with some kind of racing on the instances)
        aggregate_func: callable
            how to aggregate the runs in the runhistory to get the performance of a
             configuration
        num_run: int
            id of this run (used for pSMAC)
        model: AbstractEPM
            empirical performance model (right now, we support only
            AbstractEPM)
        acq_optimizer: AcquisitionFunctionMaximizer
            Optimizer of acquisition function.
        acquisition_function : AcquisitionFunction
            Object that implements the AbstractAcquisitionFunction (i.e., infill
            criterion for acq_optimizer)
        restore_incumbent: Configuration
            incumbent to be used from the start. ONLY used to restore states.
        rng: np.random.RandomState
            Random number generator
        """

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)
        self.incumbent = restore_incumbent

        self.scenario = scenario
        self.config_space = scenario.cs
        self.stats = stats
        self.initial_design = initial_design
        self.runhistory = runhistory
        self.rh2EPM = runhistory2epm
        self.intensifier = intensifier
        self.aggregate_func = aggregate_func
        self.num_run = num_run
        self.model = model
        self.acq_optimizer = acq_optimizer
        self.acquisition_func = acquisition_func
        self.rng = rng
        # hoag的类,直接使用hoag的fit,predict等
        self.hoag = hoag
        # 保存server端进程
        #self.server = server
        self.server = None
        self.bayesian_optimization = bayesian_optimization

        self._random_search = RandomSearch(acquisition_func, self.config_space,
                                           rng)

    def start(self):
        """Starts the Bayesian Optimization loop.
        Detects whether we the optimization is restored from previous state.
        """
        self.stats.start_timing()
        # Initialization, depends on input
        if self.stats.ta_runs == 0 and self.incumbent is None:
            try:
                if self.server is None:
                    self.incumbent = self.initial_design.run()
                else:
                    # 由worker自己产生第一个incumbent,然后由server接收其中的一个
                    self.incumbent, new_runhistory = self.server.pull()
                    self.runhistory.update(new_runhistory)
            except FirstRunCrashedException as err:
                if self.scenario.abort_on_first_run_crash:
                    raise
        elif self.stats.ta_runs > 0 and self.incumbent is None:
            raise ValueError(
                "According to stats there have been runs performed, "
                "but the optimizer cannot detect an incumbent. Did "
                "you set the incumbent (e.g. after restoring state)?")
        elif self.stats.ta_runs == 0 and self.incumbent is not None:
            raise ValueError(
                "An incumbent is specified, but there are no runs "
                "recorded in the Stats-object. If you're restoring "
                "a state, please provide the Stats-object.")
        else:
            # Restoring state!
            self.logger.info(
                "State Restored! Starting optimization with "
                "incumbent %s", self.incumbent)
            self.logger.info("State restored with following budget:")
            self.stats.print_stats()

    def run(self):
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.start()
        # 设置一个counter
        counter = 0
        # Main BO loop
        while True:
            # 打印每轮SMBO的最优结果(包括首轮SMBO 0)
            print('SMBO ' + str(counter) + ': ' +
                  str(self.runhistory.get_cost(self.incumbent)))
            counter += 1

            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_dirs=self.scenario.input_psmac_dirs,
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()
            X, Y = self.rh2EPM.transform(self.runhistory)

            self.logger.debug("Search for next configuration")
            # get all found configurations sorted according to acq
            challengers = self.choose_next(X, Y)

            time_spent = time.time() - start_time
            time_left = self._get_timebound_for_intensification(time_spent)

            self.logger.debug("Intensify")

            if self.server is None:
                self.incumbent, inc_perf = self.intensifier.intensify(
                    challengers=challengers,
                    incumbent=self.incumbent,
                    run_history=self.runhistory,
                    aggregate_func=self.aggregate_func,
                    time_bound=max(self.intensifier._min_time, time_left))
            else:
                # 从worker读取loss,加入history再运行新的challengers
                print(time_left)
                self.server.push(incumbent=self.incumbent,
                                 runhistory=self.runhistory,
                                 challengers=challengers.challengers,
                                 time_left=time_left)
                # 从worker读取runhistory,并merge到self.runhistory
                incumbent, new_runhistory = self.server.pull()
                self.runhistory.update(new_runhistory)
                # 更新了runhistory之后,应该找寻是否存在新的incumbent
                # 因为worker没有完整的
                runhistory_old = self.runhistory.get_history_for_config(
                    self.incumbent)
                runhistory_new = self.runhistory.get_history_for_config(
                    incumbent)
                # 找寻cost最小值
                lowest_cost_old = min([cost[0] for cost in runhistory_old])
                lowest_cost_new = min([cost[0] for cost in runhistory_new])
                if lowest_cost_new < lowest_cost_old:
                    # 替换为新的incumbent
                    self.incumbent = incumbent
                """可以考虑用这个函数
                new_incumbent = self._compare_configs(
                    incumbent=incumbent, challenger=challenger,
                    run_history=run_history,
                    aggregate_func=aggregate_func,
                    log_traj=log_traj)
                """

            if self.scenario.shared_model:
                pSMAC.write(
                    run_history=self.runhistory,
                    output_directory=self.scenario.output_dir_for_this_run)

            logging.debug(
                "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)"
                % (self.stats.get_remaing_time_budget(),
                   self.stats.get_remaining_ta_budget(),
                   self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent

    def choose_next(self,
                    X: np.ndarray,
                    Y: np.ndarray,
                    incumbent_value: float = None):
        """Choose next candidate solution with Bayesian optimization. The 
        suggested configurations depend on the argument ``acq_optimizer`` to
        the ``SMBO`` class.

        Parameters
        ----------
        X : (N, D) numpy array
            Each row contains a configuration and one set of
            instance features.
        Y : (N, O) numpy array
            The function values for each configuration instance pair.
        incumbent_value: float
            Cost value of incumbent configuration
            (required for acquisition function);
            if not given, it will be inferred from runhistory;
            if not given and runhistory is empty,
            it will raise a ValueError

        Returns
        -------
        Iterable
        """
        if X.shape[0] == 0:
            # Only return a single point to avoid an overly high number of
            # random search iterations
            return self._random_search.maximize(runhistory=self.runhistory,
                                                stats=self.stats,
                                                num_points=1)

        # 消去完全相同的行
        X, Y = remove_same_values(X, Y)
        print(X.shape)

        # 如果指定了hoag函数,则进行调用
        if self.hoag is not None:

            # 初始化梯度数组
            gradient = np.zeros(X.shape)
            # 对每组X,计算对应的梯度(此处有大量重复计算)
            for i in range(X.shape[0]):
                self.hoag.fit(X[i, :])
                gradient[i, :] = self.hoag.predict_gradient()

            X = X.flatten()
            ind = np.argsort(X)
            gradient = gradient.flatten()[ind].reshape(-1, 1)
            X = X[ind].reshape(-1, 1)
            Y = Y.flatten()[ind].reshape(-1, 1)
            self.model.train(X, Y, gradient=gradient)

        elif self.bayesian_optimization:
            # gpr使用的参数
            gp_params = {"alpha": 1e-5, "n_restarts_optimizer": 2}
            # 从configspace读取超参的范围
            pbounds = {}
            for key in self.scenario.cs._hyperparameters.keys():
                # 只处理float类型的超参
                hyperparamter = self.scenario.cs._hyperparameters[key],
                if isinstance(hyperparamter.default_value, float):
                    pbounds[key] = (hyperparamter.lower, hyperparamter.upper)
            # 初始化bayesian_optimization
            bo = BayesianOptimization(X, Y, pbounds=pbounds, verbose=False)
            # 预测下一个ei取得点
            newX = bo.maximize(acq="ei", **gp_params)
            # 将超参数组再转化为Configuration
            challengers = [Configuration(self.scenario.cs, x) for x in newX]
            return challengers

        else:
            self.model.train(X, Y)
        # 打印X和Y的值
        # print("X: ", X.flatten())
        # print("Y: ", Y.flatten())
        # print("Y_pred: ", self.model.predict(X))
        # if self.hoag is not None:
        #    print("G: ", gradient)

        if incumbent_value is None:
            if self.runhistory.empty():
                raise ValueError("Runhistory is empty and the cost value of "
                                 "the incumbent is unknown.")
            incumbent_value = self.runhistory.get_cost(self.incumbent)

        self.acquisition_func.update(model=self.model, eta=incumbent_value)

        challengers = self.acq_optimizer.maximize(
            # 初始为5000,提升速度调成500
            self.runhistory,
            self.stats,
            500)
        return challengers

    def validate(self,
                 config_mode='inc',
                 instance_mode='train+test',
                 repetitions=1,
                 use_epm=False,
                 n_jobs=-1,
                 backend='threading'):
        """Create validator-object and run validation, using
        scenario-information, runhistory from smbo and tae_runner from intensify

        Parameters
        ----------
        config_mode: str or list<Configuration>
            string or directly a list of Configuration
            str from [def, inc, def+inc, wallclock_time, cpu_time, all]
            time evaluates at cpu- or wallclock-timesteps of:
            [max_time/2^0, max_time/2^1, max_time/2^3, ..., default]
            with max_time being the highest recorded time
        instance_mode: string
            what instances to use for validation, from [train, test, train+test]
        repetitions: int
            number of repetitions in nondeterministic algorithms (in
            deterministic will be fixed to 1)
        use_epm: bool
            whether to use an EPM instead of evaluating all runs with the TAE
        n_jobs: int
            number of parallel processes used by joblib

        Returns
        -------
        runhistory: RunHistory
            runhistory containing all specified runs
        """
        traj_fn = os.path.join(self.scenario.output_dir_for_this_run,
                               "traj_aclib2.json")
        trajectory = TrajLogger.read_traj_aclib_format(fn=traj_fn,
                                                       cs=self.scenario.cs)
        new_rh_path = os.path.join(self.scenario.output_dir_for_this_run,
                                   "validated_runhistory.json")

        validator = Validator(self.scenario, trajectory, self.rng)
        if use_epm:
            new_rh = validator.validate_epm(config_mode=config_mode,
                                            instance_mode=instance_mode,
                                            repetitions=repetitions,
                                            runhistory=self.runhistory,
                                            output=new_rh_path)
        else:
            new_rh = validator.validate(config_mode,
                                        instance_mode,
                                        repetitions,
                                        n_jobs,
                                        backend,
                                        self.runhistory,
                                        self.intensifier.tae_runner,
                                        output=new_rh_path)
        return new_rh

    def _get_timebound_for_intensification(self, time_spent):
        """Calculate time left for intensify from the time spent on
        choosing challengers using the fraction of time intended for
        intensification (which is specified in
        scenario.intensification_percentage).

        Parameters
        ----------
        time_spent : float

        Returns
        -------
        time_left : float
        """
        frac_intensify = self.scenario.intensification_percentage
        if frac_intensify <= 0 or frac_intensify >= 1:
            raise ValueError("The value for intensification_percentage-"
                             "option must lie in (0,1), instead: %.2f" %
                             (frac_intensify))
        total_time = time_spent / (1 - frac_intensify)
        time_left = frac_intensify * total_time
        self.logger.debug("Total time: %.4f, time spent on choosing next "
                          "configurations: %.4f (%.2f), time left for "
                          "intensification: %.4f (%.2f)" %
                          (total_time, time_spent,
                           (1 - frac_intensify), time_left, frac_intensify))
        return time_left