Esempio n. 1
0
    def __init__(
            self,
            scenario: Scenario,
            tae_runner: typing.Union[ExecuteTARun, typing.Callable] = None,
            runhistory: RunHistory = None,
            intensifier: Intensifier = None,
            acquisition_function: AbstractAcquisitionFunction = None,
            acquisition_function_optimizer: AcquisitionFunctionMaximizer = None,
            model: AbstractEPM = None,
            runhistory2epm: AbstractRunHistory2EPM = None,
            initial_design: InitialDesign = None,
            initial_configurations: typing.List[Configuration] = None,
            stats: Stats = None,
            restore_incumbent: Configuration = None,
            rng: typing.Union[np.random.RandomState, int] = None,
            smbo_class: SMBO = None,
            run_id: int = 1):
        """Constructor

        Parameters
        ----------
        scenario : ~smac.scenario.scenario.Scenario
            Scenario object
        tae_runner : ~smac.tae.execute_ta_run.ExecuteTARun or callable
            Callable or implementation of
            :class:`~smac.tae.execute_ta_run.ExecuteTARun`. In case a
            callable is passed it will be wrapped by
            :class:`~smac.tae.execute_func.ExecuteTAFuncDict`.
            If not set, it will be initialized with the
            :class:`~smac.tae.execute_ta_run_old.ExecuteTARunOld`.
        runhistory : RunHistory
            runhistory to store all algorithm runs
        intensifier : Intensifier
            intensification object to issue a racing to decide the current
            incumbent
        acquisition_function : ~smac.optimizer.acquisition.AbstractAcquisitionFunction
            Object that implements the :class:`~smac.optimizer.acquisition.AbstractAcquisitionFunction`.
            Will use :class:`~smac.optimizer.acquisition.EI` if not set.
        acquisition_function_optimizer : ~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
            Object that implements the :class:`~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer`.
            Will use :class:`smac.optimizer.ei_optimization.InterleavedLocalAndRandomSearch` if not set.
        model : AbstractEPM
            Model that implements train() and predict(). Will use a
            :class:`~smac.epm.rf_with_instances.RandomForestWithInstances` if not set.
        runhistory2epm : ~smac.runhistory.runhistory2epm.RunHistory2EMP
            Object that implements the AbstractRunHistory2EPM. If None,
            will use :class:`~smac.runhistory.runhistory2epm.RunHistory2EPM4Cost`
            if objective is cost or
            :class:`~smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost`
            if objective is runtime.
        initial_design : InitialDesign
            initial sampling design
        initial_configurations : typing.List[Configuration]
            list of initial configurations for initial design --
            cannot be used together with initial_design
        stats : Stats
            optional stats object
        rng : np.random.RandomState
            Random number generator
        restore_incumbent : Configuration
            incumbent used if restoring to previous state
        smbo_class : ~smac.optimizer.smbo.SMBO
            Class implementing the SMBO interface which will be used to
            instantiate the optimizer class.
        run_id: int, (default: 1)
            Run ID will be used as subfolder for output_dir.
        """

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        aggregate_func = average_cost

        self.output_dir = create_output_directory(scenario, run_id)
        scenario.write()

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario)

        # initialize empty runhistory
        if runhistory is None:
            runhistory = RunHistory(aggregate_func=aggregate_func)
        # inject aggr_func if necessary
        if runhistory.aggregate_func is None:
            runhistory.aggregate_func = aggregate_func

        # initial random number generator
        num_run, rng = self._get_rng(rng=rng)

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(rng.randint(MAXINT))

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir, stats=self.stats)

        # initial EPM
        types, bounds = get_types(scenario.cs, scenario.feature_array)
        if model is None:
            model = RandomForestWithInstances(
                types=types,
                bounds=bounds,
                instance_features=scenario.feature_array,
                seed=rng.randint(MAXINT),
                pca_components=scenario.PCA_DIM)
        # initial acquisition function
        if acquisition_function is None:
            if scenario.run_obj == "runtime":
                acquisition_function = LogEI(model=model)
            else:
                acquisition_function = EI(model=model)
        # inject model if necessary
        if acquisition_function.model is None:
            acquisition_function.model = model

        # initialize optimizer on acquisition function
        if acquisition_function_optimizer is None:
            acquisition_function_optimizer = InterleavedLocalAndRandomSearch(
                acquisition_function, scenario.cs,
                np.random.RandomState(seed=rng.randint(MAXINT)))
        elif not isinstance(
                acquisition_function_optimizer,
                AcquisitionFunctionMaximizer,
        ):
            raise ValueError(
                "Argument 'acquisition_function_optimizer' must be of type"
                "'AcquisitionFunctionMaximizer', but is '%s'" %
                type(acquisition_function_optimizer))

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        if tae_runner is None:
            tae_runner = ExecuteTARunOld(
                ta=scenario.ta,
                stats=self.stats,
                run_obj=scenario.run_obj,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Second case, the tae_runner is a function to be optimized
        elif callable(tae_runner):
            tae_runner = ExecuteTAFuncDict(
                ta=tae_runner,
                stats=self.stats,
                run_obj=scenario.run_obj,
                memory_limit=scenario.memory_limit,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Third case, if it is an ExecuteTaRun we can simply use the
        # instance. Otherwise, the next check raises an exception
        elif not isinstance(tae_runner, ExecuteTARun):
            raise TypeError("Argument 'tae_runner' is %s, but must be "
                            "either a callable or an instance of "
                            "ExecuteTaRun. Passing 'None' will result in the "
                            "creation of target algorithm runner based on the "
                            "call string in the scenario file." %
                            type(tae_runner))

        # Check that overall objective and tae objective are the same
        if tae_runner.run_obj != scenario.run_obj:
            raise ValueError("Objective for the target algorithm runner and "
                             "the scenario must be the same, but are '%s' and "
                             "'%s'" % (tae_runner.run_obj, scenario.run_obj))

        # inject stats if necessary
        if tae_runner.stats is None:
            tae_runner.stats = self.stats
        # inject runhistory if necessary
        if tae_runner.runhistory is None:
            tae_runner.runhistory = runhistory
        # inject cost_for_crash
        if tae_runner.crash_cost != scenario.cost_for_crash:
            tae_runner.crash_cost = scenario.cost_for_crash

        # initialize intensification
        if intensifier is None:
            intensifier = Intensifier(tae_runner=tae_runner,
                                      stats=self.stats,
                                      traj_logger=traj_logger,
                                      rng=rng,
                                      instances=scenario.train_insts,
                                      cutoff=scenario.cutoff,
                                      deterministic=scenario.deterministic,
                                      run_obj_time=scenario.run_obj == "runtime",
                                      always_race_against=scenario.cs.get_default_configuration() \
                                        if scenario.always_race_default else None,
                                      instance_specifics=scenario.instance_specific,
                                      minR=scenario.minR,
                                      maxR=scenario.maxR)
        # inject deps if necessary
        if intensifier.tae_runner is None:
            intensifier.tae_runner = tae_runner
        if intensifier.stats is None:
            intensifier.stats = self.stats
        if intensifier.traj_logger is None:
            intensifier.traj_logger = traj_logger

        # initial design
        if initial_design is not None and initial_configurations is not None:
            raise ValueError(
                "Either use initial_design or initial_configurations; but not both"
            )

        if initial_configurations is not None:
            initial_design = MultiConfigInitialDesign(
                tae_runner=tae_runner,
                scenario=scenario,
                stats=self.stats,
                traj_logger=traj_logger,
                runhistory=runhistory,
                rng=rng,
                configs=initial_configurations,
                intensifier=intensifier,
                aggregate_func=aggregate_func)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":
                initial_design = DefaultConfiguration(tae_runner=tae_runner,
                                                      scenario=scenario,
                                                      stats=self.stats,
                                                      traj_logger=traj_logger,
                                                      rng=rng)
            elif scenario.initial_incumbent == "RANDOM":
                initial_design = RandomConfiguration(tae_runner=tae_runner,
                                                     scenario=scenario,
                                                     stats=self.stats,
                                                     traj_logger=traj_logger,
                                                     rng=rng)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" % scenario.initial_incumbent)
        # inject deps if necessary
        if initial_design.tae_runner is None:
            initial_design.tae_runner = tae_runner
        if initial_design.scenario is None:
            initial_design.scenario = scenario
        if initial_design.stats is None:
            initial_design.stats = self.stats
        if initial_design.traj_logger is None:
            initial_design.traj_logger = traj_logger

        # initial conversion of runhistory into EPM data
        if runhistory2epm is None:

            num_params = len(scenario.cs.get_hyperparameters())
            if scenario.run_obj == "runtime":

                # if we log the performance data,
                # the RFRImputator will already get
                # log transform data from the runhistory
                cutoff = np.log10(scenario.cutoff)
                threshold = np.log10(scenario.cutoff * scenario.par_factor)

                imputor = RFRImputator(rng=rng,
                                       cutoff=cutoff,
                                       threshold=threshold,
                                       model=model,
                                       change_threshold=0.01,
                                       max_iter=2)

                runhistory2epm = RunHistory2EPM4LogCost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[
                        StatusType.SUCCESS,
                    ],
                    impute_censored_data=True,
                    impute_state=[
                        StatusType.CAPPED,
                    ],
                    imputor=imputor)

            elif scenario.run_obj == 'quality':
                runhistory2epm = RunHistory2EPM4Cost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[StatusType.SUCCESS, StatusType.CRASHED],
                    impute_censored_data=False,
                    impute_state=None)

            else:
                raise ValueError('Unknown run objective: %s. Should be either '
                                 'quality or runtime.' % self.scenario.run_obj)

        # inject scenario if necessary:
        if runhistory2epm.scenario is None:
            runhistory2epm.scenario = scenario

        smbo_args = {
            'scenario': scenario,
            'stats': self.stats,
            'initial_design': initial_design,
            'runhistory': runhistory,
            'runhistory2epm': runhistory2epm,
            'intensifier': intensifier,
            'aggregate_func': aggregate_func,
            'num_run': num_run,
            'model': model,
            'acq_optimizer': acquisition_function_optimizer,
            'acquisition_func': acquisition_function,
            'rng': rng,
            'restore_incumbent': restore_incumbent
        }
        if smbo_class is None:
            self.solver = SMBO(**smbo_args)
        else:
            self.solver = smbo_class(**smbo_args)
Esempio n. 2
0
    def __init__(
            self,
            scenario: Scenario,
            # TODO: once we drop python3.4 add type hint
            # typing.Union[ExecuteTARun, callable]
            tae_runner=None,
            runhistory: RunHistory = None,
            intensifier: Intensifier = None,
            acquisition_function: AbstractAcquisitionFunction = None,
            model: AbstractEPM = None,
            runhistory2epm: AbstractRunHistory2EPM = None,
            initial_design: InitialDesign = None,
            initial_configurations: typing.List[Configuration] = None,
            stats: Stats = None,
            rng: np.random.RandomState = None,
            run_id: int = 1):
        """Constructor"""
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        aggregate_func = average_cost
        self.runhistory = None
        self.trajectory = None

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario)

        self.output_dir = create_output_directory(scenario, run_id)
        scenario.write()

        # initialize empty runhistory
        if runhistory is None:
            runhistory = RunHistory(aggregate_func=aggregate_func)
        # inject aggr_func if necessary
        if runhistory.aggregate_func is None:
            runhistory.aggregate_func = aggregate_func

        # initial random number generator
        num_run, rng = self._get_rng(rng=rng)

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(rng.randint(MAXINT))

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir, stats=self.stats)

        # initial EPM
        types, bounds = get_types(scenario.cs, scenario.feature_array)
        if model is None:
            model = RandomForestWithInstances(
                types=types,
                bounds=bounds,
                instance_features=scenario.feature_array,
                seed=rng.randint(MAXINT),
                pca_components=scenario.PCA_DIM,
                num_trees=scenario.rf_num_trees,
                do_bootstrapping=scenario.rf_do_bootstrapping,
                ratio_features=scenario.rf_ratio_features,
                min_samples_split=scenario.rf_min_samples_split,
                min_samples_leaf=scenario.rf_min_samples_leaf,
                max_depth=scenario.rf_max_depth)
        # initial acquisition function
        if acquisition_function is None:
            if scenario.run_obj == "runtime":
                acquisition_function = LogEI(model=model)
            else:
                acquisition_function = EI(model=model)
        # inject model if necessary
        if acquisition_function.model is None:
            acquisition_function.model = model

        # initialize optimizer on acquisition function
        local_search = LocalSearch(
            acquisition_function,
            scenario.cs,
            max_steps=scenario.sls_max_steps,
            n_steps_plateau_walk=scenario.sls_n_steps_plateau_walk)

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        if tae_runner is None:
            tae_runner = ExecuteTARunOld(
                ta=scenario.ta,
                stats=self.stats,
                run_obj=scenario.run_obj,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Second case, the tae_runner is a function to be optimized
        elif callable(tae_runner):
            tae_runner = ExecuteTAFuncDict(
                ta=tae_runner,
                stats=self.stats,
                run_obj=scenario.run_obj,
                memory_limit=scenario.memory_limit,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Third case, if it is an ExecuteTaRun we can simply use the
        # instance. Otherwise, the next check raises an exception
        elif not isinstance(tae_runner, ExecuteTARun):
            raise TypeError("Argument 'tae_runner' is %s, but must be "
                            "either a callable or an instance of "
                            "ExecuteTaRun. Passing 'None' will result in the "
                            "creation of target algorithm runner based on the "
                            "call string in the scenario file." %
                            type(tae_runner))

        # Check that overall objective and tae objective are the same
        if tae_runner.run_obj != scenario.run_obj:
            raise ValueError("Objective for the target algorithm runner and "
                             "the scenario must be the same, but are '%s' and "
                             "'%s'" % (tae_runner.run_obj, scenario.run_obj))

        # inject stats if necessary
        if tae_runner.stats is None:
            tae_runner.stats = self.stats
        # inject runhistory if necessary
        if tae_runner.runhistory is None:
            tae_runner.runhistory = runhistory
        # inject cost_for_crash
        if tae_runner.crash_cost != scenario.cost_for_crash:
            tae_runner.crash_cost = scenario.cost_for_crash

        # initialize intensification
        if intensifier is None:
            intensifier = Intensifier(
                tae_runner=tae_runner,
                stats=self.stats,
                traj_logger=traj_logger,
                rng=rng,
                instances=scenario.train_insts,
                cutoff=scenario.cutoff,
                deterministic=scenario.deterministic,
                run_obj_time=scenario.run_obj == "runtime",
                always_race_against=scenario.cs.get_default_configuration()
                if scenario.always_race_default else None,
                instance_specifics=scenario.instance_specific,
                minR=scenario.minR,
                maxR=scenario.maxR,
                adaptive_capping_slackfactor=scenario.
                intens_adaptive_capping_slackfactor,
                min_chall=scenario.intens_min_chall)
        # inject deps if necessary
        if intensifier.tae_runner is None:
            intensifier.tae_runner = tae_runner
        if intensifier.stats is None:
            intensifier.stats = self.stats
        if intensifier.traj_logger is None:
            intensifier.traj_logger = traj_logger

        # initial design
        if initial_design is not None and initial_configurations is not None:
            raise ValueError(
                "Either use initial_design or initial_configurations; but not both"
            )

        if initial_configurations is not None:
            initial_design = MultiConfigInitialDesign(
                tae_runner=tae_runner,
                scenario=scenario,
                stats=self.stats,
                traj_logger=traj_logger,
                runhistory=runhistory,
                rng=rng,
                configs=initial_configurations,
                intensifier=intensifier,
                aggregate_func=aggregate_func)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":
                initial_design = DefaultConfiguration(tae_runner=tae_runner,
                                                      scenario=scenario,
                                                      stats=self.stats,
                                                      traj_logger=traj_logger,
                                                      rng=rng)
            elif scenario.initial_incumbent == "RANDOM":
                initial_design = RandomConfiguration(tae_runner=tae_runner,
                                                     scenario=scenario,
                                                     stats=self.stats,
                                                     traj_logger=traj_logger,
                                                     rng=rng)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" % scenario.initial_incumbent)
        # inject deps if necessary
        if initial_design.tae_runner is None:
            initial_design.tae_runner = tae_runner
        if initial_design.scenario is None:
            initial_design.scenario = scenario
        if initial_design.stats is None:
            initial_design.stats = self.stats
        if initial_design.traj_logger is None:
            initial_design.traj_logger = traj_logger

        # initial conversion of runhistory into EPM data
        if runhistory2epm is None:

            num_params = len(scenario.cs.get_hyperparameters())
            if scenario.run_obj == "runtime":

                # if we log the performance data,
                # the RFRImputator will already get
                # log transform data from the runhistory
                cutoff = np.log(scenario.cutoff)
                threshold = np.log(scenario.cutoff * scenario.par_factor)

                imputor = RFRImputator(rng=rng,
                                       cutoff=cutoff,
                                       threshold=threshold,
                                       model=model,
                                       change_threshold=0.01,
                                       max_iter=2)

                runhistory2epm = RunHistory2EPM4LogCost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[
                        StatusType.SUCCESS,
                    ],
                    impute_censored_data=True,
                    impute_state=[
                        StatusType.CAPPED,
                    ],
                    imputor=imputor)

            elif scenario.run_obj == 'quality':
                runhistory2epm = RunHistory2EPM4Cost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[
                        StatusType.SUCCESS,
                    ],
                    impute_censored_data=False,
                    impute_state=None)

            else:
                raise ValueError('Unknown run objective: %s. Should be either '
                                 'quality or runtime.' % self.scenario.run_obj)

        # inject scenario if necessary:
        if runhistory2epm.scenario is None:
            runhistory2epm.scenario = scenario

        self.solver = EPILS_Solver(scenario=scenario,
                                   stats=self.stats,
                                   initial_design=initial_design,
                                   runhistory=runhistory,
                                   runhistory2epm=runhistory2epm,
                                   intensifier=intensifier,
                                   aggregate_func=aggregate_func,
                                   num_run=num_run,
                                   model=model,
                                   acq_optimizer=local_search,
                                   acquisition_func=acquisition_function,
                                   rng=rng)