Exemple #1
0
    def get_tuned_config(self, scenario: ASlibScenario):
        '''
            uses SMAC3 to determine a well-performing configuration in the configuration space self.cs on the given scenario

            Arguments
            ---------
            scenario: ASlibScenario
                ASlib Scenario at hand

            Returns
            -------
            Configuration
                best incumbent configuration found by SMAC
        '''

        taf = ExecuteTAFunc(functools.partial(self.run_cv, scenario=scenario))

        ac_scenario = Scenario({"run_obj": "quality",  # we optimize quality
                                # at most 10 function evaluations
                                "runcount-limit": 10,
                                "cs": self.cs,  # configuration space
                                "deterministic": "true"
                                })

        # necessary to use stats options related to scenario information
        AC_Stats.scenario = ac_scenario

        # Optimize
        self.logger.info(
            ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
        self.logger.info("Start Configuration")
        self.logger.info(
            ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
        smbo = SMBO(scenario=ac_scenario, tae_runner=taf,
                    rng=np.random.RandomState(42))
        smbo.run(max_iters=999)

        AC_Stats.print_stats()
        self.logger.info("Final Incumbent: %s" % (smbo.incumbent))

        return smbo.incumbent
Exemple #2
0
class SMBO(BaseSolver):
    def __init__(self,
                 scenario,
                 tae_runner=None,
                 acquisition_function=None,
                 model=None,
                 runhistory2epm=None,
                 stats=None,
                 rng=None):
        '''
        Interface that contains the main Bayesian optimization loop

        Parameters
        ----------
        scenario: smac.scenario.scenario.Scenario
            Scenario object
        tae_runner: object
            object that implements the following method to call the target
            algorithm (or any other arbitrary function):
            run(self, config)
            If not set, it will be initialized with the tae.ExecuteTARunOld()
        acquisition_function : AcquisitionFunction
            Object that implements the AbstractAcquisitionFunction. Will use
            EI if not set.
        model : object
            Model that implements train() and predict(). Will use a
            RandomForest if not set.
        runhistory2epm : RunHistory2EMP
            Object that implements the AbstractRunHistory2EPM. If None,
            will use RunHistory2EPM4Cost if objective is cost or
            RunHistory2EPM4LogCost if objective is runtime.
        stats: Stats
            optional stats object
        rng: numpy.random.RandomState
            Random number generator
        '''

        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario)

        self.runhistory = RunHistory()

        self.logger = logging.getLogger("smbo")

        if rng is None:
            self.num_run = np.random.randint(1234567980)
            self.rng = np.random.RandomState(seed=self.num_run)
        elif isinstance(rng, int):
            self.num_run = rng
            self.rng = np.random.RandomState(seed=rng)
        elif isinstance(rng, np.random.RandomState):
            self.num_run = rng.randint(1234567980)
            self.rng = rng
        else:
            raise TypeError('Unknown type %s for argument rng. Only accepts '
                            'None, int or np.random.RandomState' %
                            str(type(rng)))

        self.scenario = scenario
        self.config_space = scenario.cs
        self.traj_logger = TrajLogger(output_dir=self.scenario.output_dir,
                                      stats=self.stats)

        self.types = get_types(self.config_space, scenario.feature_array)
        if model is None:
            self.model = RandomForestWithInstances(
                self.types,
                scenario.feature_array,
                seed=self.rng.randint(1234567980))
        else:
            self.model = model

        if acquisition_function is None:
            self.acquisition_func = EI(self.model)
        else:
            self.acquisition_func = acquisition_function

        self.local_search = LocalSearch(self.acquisition_func,
                                        self.config_space)
        self.incumbent = None

        if tae_runner is None:
            self.executor = ExecuteTARunOld(ta=scenario.ta,
                                            stats=self.stats,
                                            run_obj=scenario.run_obj,
                                            par_factor=scenario.par_factor)
        else:
            self.executor = tae_runner

        self.inten = Intensifier(
            executor=self.executor,
            stats=self.stats,
            traj_logger=self.traj_logger,
            instances=self.scenario.train_insts,
            cutoff=self.scenario.cutoff,
            deterministic=self.scenario.deterministic,
            run_obj_time=self.scenario.run_obj == "runtime",
            instance_specifics=self.scenario.instance_specific)

        num_params = len(self.config_space.get_hyperparameters())

        self.objective = average_cost
        if self.scenario.run_obj == "runtime":

            if runhistory2epm is None:
                # if we log the performance data,
                # the RFRImputator will already get
                # log transform data from the runhistory
                cutoff = np.log10(self.scenario.cutoff)
                threshold = np.log10(self.scenario.cutoff *
                                     self.scenario.par_factor)

                imputor = RFRImputator(cs=self.config_space,
                                       rs=self.rng,
                                       cutoff=cutoff,
                                       threshold=threshold,
                                       model=self.model,
                                       change_threshold=0.01,
                                       max_iter=10)
                self.rh2EPM = RunHistory2EPM4LogCost(scenario=self.scenario,
                                                     num_params=num_params,
                                                     success_states=[
                                                         StatusType.SUCCESS,
                                                     ],
                                                     impute_censored_data=True,
                                                     impute_state=[
                                                         StatusType.TIMEOUT,
                                                     ],
                                                     imputor=imputor)
            else:
                self.rh2EPM = runhistory2epm

        elif self.scenario.run_obj == 'quality':
            if runhistory2epm is None:
                self.rh2EPM = RunHistory2EPM4Cost\
                    (scenario=self.scenario, num_params=num_params,
                     success_states=[StatusType.SUCCESS, ],
                     impute_censored_data=False, impute_state=None)
            else:
                self.rh2EPM = runhistory2epm

        else:
            raise ValueError('Unknown run objective: %s. Should be either '
                             'quality or runtime.' % self.scenario.run_obj)

    def run_initial_design(self):
        '''
            runs algorithm runs for a initial design;
            default implementation: running the default configuration on
                                    a random instance-seed pair
            Side effect: adds runs to self.runhistory

            Returns
            -------
            incumbent: Configuration()
                initial incumbent configuration
        '''

        default_conf = self.config_space.get_default_configuration()
        self.incumbent = default_conf

        # add this incumbent right away to have an entry to time point 0
        self.traj_logger.add_entry(train_perf=2**31,
                                   incumbent_id=1,
                                   incumbent=self.incumbent)

        rand_inst_id = self.rng.randint(0, len(self.scenario.train_insts))
        # ignore instance specific values
        rand_inst = self.scenario.train_insts[rand_inst_id]

        if self.scenario.deterministic:
            initial_seed = 0
        else:
            initial_seed = random.randint(0, MAXINT)

        status, cost, runtime, additional_info = self.executor.start(
            default_conf,
            instance=rand_inst,
            cutoff=self.scenario.cutoff,
            seed=initial_seed,
            instance_specific=self.scenario.instance_specific.get(
                rand_inst, "0"))

        if status in [StatusType.CRASHED or StatusType.ABORT]:
            self.logger.critical("First run crashed -- Abort")
            sys.exit(1)

        self.runhistory.add(config=default_conf,
                            cost=cost,
                            time=runtime,
                            status=status,
                            instance_id=rand_inst,
                            seed=initial_seed,
                            additional_info=additional_info)
        defaul_inst_seeds = set(
            self.runhistory.get_runs_for_config(default_conf))
        default_perf = self.objective(default_conf, self.runhistory,
                                      defaul_inst_seeds)
        self.runhistory.update_cost(default_conf, default_perf)

        self.stats.inc_changed += 1  # first incumbent

        self.traj_logger.add_entry(train_perf=default_perf,
                                   incumbent_id=self.stats.inc_changed,
                                   incumbent=self.incumbent)

        return default_conf

    def run(self, max_iters=10):
        '''
        Runs the Bayesian optimization loop for max_iters iterations

        Parameters
        ----------
        max_iters: int
            The maximum number of iterations

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        '''
        self.stats.start_timing()

        #self.runhistory = RunHisory()

        self.incumbent = self.run_initial_design()

        # Main BO loop
        iteration = 1
        while True:
            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_directory=self.scenario.output_dir,
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()
            X, Y = self.rh2EPM.transform(self.runhistory)

            self.logger.debug("Search for next configuration")
            # get all found configurations sorted according to acq
            challengers = self.choose_next(X, Y)

            time_spend = time.time() - start_time
            logging.debug(
                "Time spend to choose next configurations: %.2f sec" %
                (time_spend))

            self.logger.debug("Intensify")

            self.incumbent, inc_perf = self.inten.intensify(
                challengers=challengers,
                incumbent=self.incumbent,
                run_history=self.runhistory,
                objective=self.objective,
                time_bound=max(0.01, time_spend))

            # TODO: Write run history into database
            if self.scenario.shared_model:
                pSMAC.write(run_history=self.runhistory,
                            output_directory=self.scenario.output_dir,
                            num_run=self.num_run)

            if iteration == max_iters:
                break

            iteration += 1

            logging.debug(
                "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)"
                % (self.stats.get_remaing_time_budget(),
                   self.stats.get_remaining_ta_budget(),
                   self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent

    def choose_next(self,
                    X,
                    Y,
                    num_interleaved_random=1010,
                    num_configurations_by_random_search_sorted=1000,
                    num_configurations_by_local_search=10):
        """Choose next candidate solution with Bayesian optimization.

        Parameters
        ----------
        X : (N, D) numpy array
            Each row contains a configuration and one set of
            instance features.
        Y : (N, O) numpy array
            The function values for each configuration instance pair.

        Returns
        -------
        list
            List of 2020 suggested configurations to evaluate.
        """
        self.model.train(X, Y)

        if self.runhistory.empty():
            incumbent_value = 0.0
        elif self.incumbent is None:
            # TODO try to calculate an incumbent from the runhistory!
            incumbent_value = 0.0
        else:
            incumbent_value = self.runhistory.get_cost(self.incumbent)

        self.acquisition_func.update(model=self.model, eta=incumbent_value)

        # Remove dummy acquisition function value
        next_configs_by_random_search = [
            x[1] for x in self._get_next_by_random_search(
                num_points=num_interleaved_random)
        ]

        # Get configurations sorted by EI
        next_configs_by_random_search_sorted = \
            self._get_next_by_random_search(
                num_configurations_by_random_search_sorted, _sorted=True)
        next_configs_by_local_search = \
            self._get_next_by_local_search(num_configurations_by_local_search)

        next_configs_by_acq_value = next_configs_by_random_search_sorted + \
            next_configs_by_local_search
        next_configs_by_acq_value.sort(reverse=True, key=lambda x: x[0])
        self.logger.debug(
            "First 10 acq func values of selected configurations: %s" %
            (str([_[0] for _ in next_configs_by_acq_value[:10]])))
        next_configs_by_acq_value = [_[1] for _ in next_configs_by_acq_value]

        challengers = list(
            itertools.chain(*zip(next_configs_by_acq_value,
                                 next_configs_by_random_search)))
        return challengers

    def _get_next_by_random_search(self, num_points=1000, _sorted=False):
        """Get candidate solutions via local search.

        Parameters
        ----------
        num_points : int, optional (default=10)
            Number of local searches and returned values.

        _sorted : bool, optional (default=True)
            Whether to sort the candidate solutions by acquisition function
            value.

        Returns
        -------
        list : (acquisition value, Candidate solutions)
        """

        rand_configs = self.config_space.sample_configuration(size=num_points)
        if _sorted:
            imputed_rand_configs = map(ConfigSpace.util.impute_inactive_values,
                                       rand_configs)
            imputed_rand_configs = [
                x.get_array() for x in imputed_rand_configs
            ]
            imputed_rand_configs = np.array(imputed_rand_configs,
                                            dtype=np.float64)
            acq_values = self.acquisition_func(imputed_rand_configs)
            # From here
            # http://stackoverflow.com/questions/20197990/how-to-make-argsort-result-to-be-random-between-equal-values
            random = self.rng.rand(len(acq_values))
            # Last column is primary sort key!
            indices = np.lexsort((random.flatten(), acq_values.flatten()))

            for i in range(len(rand_configs)):
                rand_configs[i].origin = 'Random Search (sorted)'

            # Cannot use zip here because the indices array cannot index the
            # rand_configs list, because the second is a pure python list
            return [(acq_values[ind][0], rand_configs[ind])
                    for ind in indices[::-1]]
        else:
            for i in range(len(rand_configs)):
                rand_configs[i].origin = 'Random Search'
            return [(0, rand_configs[i]) for i in range(len(rand_configs))]

    def _get_next_by_local_search(self, num_points=10):
        """Get candidate solutions via local search.

        In case acquisition function values tie, these will be broken randomly.

        Parameters
        ----------
        num_points : int, optional (default=10)
            Number of local searches and returned values.

        Returns
        -------
        list : (acquisition value, Candidate solutions),
               ordered by their acquisition function value
        """
        configs_acq = []

        # Start N local search from different random start points
        for i in range(num_points):
            if i == 0 and self.incumbent is not None:
                start_point = self.incumbent
            else:
                start_point = self.config_space.sample_configuration()

            configuration, acq_val = self.local_search.maximize(start_point)

            configuration.origin = 'Local Search'
            configs_acq.append((acq_val[0][0], configuration))

        # shuffle for random tie-break
        random.shuffle(configs_acq, self.rng.rand)

        # sort according to acq value
        # and return n best configurations
        configs_acq.sort(reverse=True, key=lambda x: x[0])

        return configs_acq