def __init__(self,
                 target_names: List[str],
                 configspace: ConfigurationSpace,
                 bounds: List[Tuple[float, float]],
                 types: np.ndarray,
                 seed: int,
                 rf_kwargs: Optional[Dict[str, Any]] = None,
                 **kwargs):
        """Constructor

        Parameters
        ----------
        target_names : list
            List of str, each entry is the name of one target dimension. Length
            of the list will be ``n_objectives``.

        bounds : np.ndarray
            See :class:`~dsmac.epm.rf_with_instances.RandomForestWithInstances` documentation.

        types : np.ndarray
            See :class:`~dsmac.epm.rf_with_instances.RandomForestWithInstances` documentation.

        kwargs
            See :class:`~dsmac.epm.rf_with_instances.RandomForestWithInstances` documentation.
        """
        super().__init__(configspace=configspace,
                         bounds=bounds,
                         types=types,
                         seed=seed,
                         **kwargs)
        if rf_kwargs is None:
            rf_kwargs = {}

        self.target_names = target_names
        self.num_targets = len(self.target_names)
        print(seed, rf_kwargs)
        self.estimators = [
            RandomForestWithInstances(configspace, types, bounds, **rf_kwargs)
            for _ in range(self.num_targets)
        ]
    def __init__(
        self,
        scenario: Scenario,
        tae_runner: Optional[Union[Type[ExecuteTARun], Callable]] = None,
        tae_runner_kwargs: Optional[dict] = None,
        runhistory: Optional[Union[Type[RunHistory], RunHistory]] = None,
        runhistory_kwargs: Optional[dict] = None,
        intensifier: Optional[Type[Intensifier]] = None,
        intensifier_kwargs: Optional[dict] = None,
        acquisition_function: Optional[
            Type[AbstractAcquisitionFunction]] = None,
        acquisition_function_kwargs: Optional[dict] = None,
        integrate_acquisition_function: bool = False,
        acquisition_function_optimizer: Optional[
            Type[AcquisitionFunctionMaximizer]] = None,
        acquisition_function_optimizer_kwargs: Optional[dict] = None,
        model: Optional[Type[AbstractEPM]] = None,
        model_kwargs: Optional[dict] = None,
        runhistory2epm: Optional[Type[AbstractRunHistory2EPM]] = None,
        runhistory2epm_kwargs: Optional[dict] = None,
        initial_design: Optional[Type[InitialDesign]] = None,
        initial_design_kwargs: Optional[dict] = None,
        initial_configurations: Optional[List[Configuration]] = None,
        stats: Optional[Stats] = None,
        restore_incumbent: Optional[Configuration] = None,
        rng: Optional[Union[np.random.RandomState, int]] = None,
        smbo_class: Optional[SMBO] = None,
        run_id: Optional[int] = None,
        random_configuration_chooser: Optional[
            Type[RandomConfigurationChooser]] = None,
        random_configuration_chooser_kwargs: Optional[dict] = None,
    ):
        """
        Constructor

        Parameters
        ----------
        scenario : ~dsmac.scenario.scenario.Scenario
            Scenario object
        tae_runner : ~dsmac.tae.execute_ta_run.ExecuteTARun or callable
            Callable or implementation of
            :class:`~dsmac.tae.execute_ta_run.ExecuteTARun`. In case a
            callable is passed it will be wrapped by
            :class:`~dsmac.tae.execute_func.ExecuteTAFuncDict`.
            If not set, it will be initialized with the
            :class:`~dsmac.tae.execute_ta_run_old.ExecuteTARunOld`.
        tae_runner_kwargs: Optional[dict]
            arguments passed to constructor of '~tae_runner'
        runhistory : RunHistory
            runhistory to store all algorithm runs
        runhistory_kwargs : Optional[dict]
            arguments passed to constructor of runhistory.
            We strongly advise against changing the aggregation function,
            since it will break some code assumptions
        intensifier : Intensifier
            intensification object to issue a racing to decide the current
            incumbent
        intensifier_kwargs: Optional[dict]
            arguments passed to the constructor of '~intensifier'
        acquisition_function : ~dsmac.optimizer.acquisition.AbstractAcquisitionFunction
            Class or object that implements the :class:`~dsmac.optimizer.acquisition.AbstractAcquisitionFunction`.
            Will use :class:`~dsmac.optimizer.acquisition.EI` or :class:`~dsmac.optimizer.acquisition.LogEI` if not set.
            `~acquisition_function_kwargs` is passed to the class constructor.
        acquisition_function_kwargs : Optional[dict]
            dictionary to pass specific arguments to ~acquisition_function
        integrate_acquisition_function : bool, default=False
            Whether to integrate the acquisition function. Works only with models which can sample their
            hyperparameters (i.e. GaussianProcessMCMC).
        acquisition_function_optimizer : ~dsmac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
            Object that implements the :class:`~dsmac.optimizer.ei_optimization.AcquisitionFunctionMaximizer`.
            Will use :class:`dsmac.optimizer.ei_optimization.InterleavedLocalAndRandomSearch` if not set.
        acquisition_function_optimizer_kwargs: Optional[dict]
            Arguments passed to constructor of '~acquisition_function_optimizer'
        model : AbstractEPM
            Model that implements train() and predict(). Will use a
            :class:`~dsmac.epm.rf_with_instances.RandomForestWithInstances` if not set.
        model_kwargs : Optional[dict]
            Arguments passed to constructor of '~model'
        runhistory2epm : ~dsmac.runhistory.runhistory2epm.RunHistory2EMP
            Object that implements the AbstractRunHistory2EPM. If None,
            will use :class:`~dsmac.runhistory.runhistory2epm.RunHistory2EPM4Cost`
            if objective is cost or
            :class:`~dsmac.runhistory.runhistory2epm.RunHistory2EPM4LogCost`
            if objective is runtime.
        runhistory2epm_kwargs: Optional[dict]
            Arguments passed to the constructor of '~runhistory2epm'
        initial_design : InitialDesign
            initial sampling design
        initial_design_kwargs: Optional[dict]
            arguments passed to constructor of `~initial_design'
        initial_configurations : List[Configuration]
            list of initial configurations for initial design --
            cannot be used together with initial_design
        stats : Stats
            optional stats object
        rng : np.random.RandomState
            Random number generator
        restore_incumbent : Configuration
            incumbent used if restoring to previous state
        smbo_class : ~dsmac.optimizer.smbo.SMBO
            Class implementing the SMBO interface which will be used to
            instantiate the optimizer class.
        run_id : int (optional)
            Run ID will be used as subfolder for output_dir. If no ``run_id`` is given, a random ``run_id`` will be
            chosen.
        random_configuration_chooser : ~dsmac.optimizer.random_configuration_chooser.RandomConfigurationChooser
            How often to choose a random configuration during the intensification procedure.
        random_configuration_chooser_kwargs : Optional[dict]
            arguments of constructor for '~random_configuration_chooser'

        """
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        aggregate_func = average_cost

        self.scenario = scenario
        self.output_dir = ""
        if not restore_incumbent:
            # restore_incumbent is used by the CLI interface which provides a method for restoring a SMAC run given an
            # output directory. This is the default path.
            # initial random number generator
            # run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            # run_id=datetime.now().strftime("%Y%m%d%H%M%S%f")
            run_id = uuid1()
            # self.output_dir = create_output_directory(scenario, run_id)   # fixme run_id
            self.output_dir = scenario.output_dir  # create_output_directory(scenario, run_id)   # fixme run_id

        elif scenario.output_dir is not None:
            run_id, rng = get_rng(rng=rng, run_id=run_id, logger=self.logger)
            # output-directory is created in CLI when restoring from a
            # folder. calling the function again in the facade results in two
            # folders being created: run_X and run_X.OLD. if we are
            # restoring, the output-folder exists already and we omit creating it,
            # but set the self-output_dir to the dir.
            # necessary because we want to write traj to new output-dir in CLI.
            self.output_dir = scenario.output_dir_for_this_run

        if (scenario.deterministic is True
                and getattr(scenario, 'tuner_timeout', None) is None
                and scenario.run_obj == 'quality'):
            self.logger.info(
                'Optimizing a deterministic scenario for quality without a tuner timeout - will make '
                'SMAC deterministic and only evaluate one configuration per iteration!'
            )
            scenario.intensification_percentage = 1e-10
            scenario.min_chall = 1
        scenario.write()

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario, file_system=scenario.file_system)

        if self.scenario.run_obj == "runtime" and not self.scenario.transform_y == "LOG":
            self.logger.warning(
                "Runtime as objective automatically activates log(y) transformation"
            )
            self.scenario.transform_y = "LOG"

        # initialize empty runhistory
        runhistory_def_kwargs = {'aggregate_func': aggregate_func}
        if runhistory_kwargs is not None:
            runhistory_def_kwargs.update(runhistory_kwargs)
        if runhistory is None:
            runhistory = RunHistory(**runhistory_def_kwargs,
                                    file_system=scenario.file_system,
                                    db_type=scenario.db_type,
                                    db_args=scenario.db_args,
                                    db_kwargs=scenario.db_kwargs)
        elif inspect.isclass(runhistory):
            runhistory = runhistory(**runhistory_def_kwargs)
        else:
            if runhistory.aggregate_func is None:
                runhistory.aggregate_func = aggregate_func

        rand_conf_chooser_kwargs = {'rng': rng}
        if random_configuration_chooser_kwargs is not None:
            rand_conf_chooser_kwargs.update(
                random_configuration_chooser_kwargs)
        if random_configuration_chooser is None:
            if 'prob' not in rand_conf_chooser_kwargs:
                rand_conf_chooser_kwargs['prob'] = scenario.rand_prob
            random_configuration_chooser = ChooserProb(
                **rand_conf_chooser_kwargs)
        elif inspect.isclass(random_configuration_chooser):
            random_configuration_chooser = random_configuration_chooser(
                **rand_conf_chooser_kwargs)
        elif not isinstance(random_configuration_chooser,
                            RandomConfigurationChooser):
            raise ValueError(
                "random_configuration_chooser has to be"
                " a class or object of RandomConfigurationChooser")

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(rng.randint(MAXINT))

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir,
                                 stats=self.stats,
                                 file_system=scenario.file_system)

        # initial EPM
        types, bounds = get_types(scenario.cs, scenario.feature_array)
        model_def_kwargs = {
            'types': types,
            'bounds': bounds,
            'instance_features': scenario.feature_array,
            'seed': rng.randint(MAXINT),
            'pca_components': scenario.PCA_DIM,
        }
        if model_kwargs is not None:
            model_def_kwargs.update(model_kwargs)
        if model is None:
            for key, value in {
                    'log_y': scenario.transform_y in ["LOG", "LOGS"],
                    'num_trees': scenario.rf_num_trees,
                    'do_bootstrapping': scenario.rf_do_bootstrapping,
                    'ratio_features': scenario.rf_ratio_features,
                    'min_samples_split': scenario.rf_min_samples_split,
                    'min_samples_leaf': scenario.rf_min_samples_leaf,
                    'max_depth': scenario.rf_max_depth,
            }.items():
                if key not in model_def_kwargs:
                    model_def_kwargs[key] = value
            model_def_kwargs['configspace'] = self.scenario.cs
            model = RandomForestWithInstances(**model_def_kwargs)
        elif inspect.isclass(model):
            model_def_kwargs['configspace'] = self.scenario.cs
            model = model(**model_def_kwargs)
        else:
            raise TypeError("Model not recognized: %s" % (type(model)))

        # initial acquisition function
        acq_def_kwargs = {'model': model}
        if acquisition_function_kwargs is not None:
            acq_def_kwargs.update(acquisition_function_kwargs)
        if acquisition_function is None:
            if scenario.transform_y in ["LOG", "LOGS"]:
                acquisition_function = LogEI(**acq_def_kwargs)
            else:
                acquisition_function = EI(**acq_def_kwargs)
        elif inspect.isclass(acquisition_function):
            acquisition_function = acquisition_function(**acq_def_kwargs)
        else:
            raise TypeError(
                "Argument acquisition_function must be None or an object implementing the "
                "AbstractAcquisitionFunction, not %s." %
                type(acquisition_function))
        if integrate_acquisition_function:
            acquisition_function = IntegratedAcquisitionFunction(
                acquisition_function=acquisition_function, **acq_def_kwargs)

        # initialize optimizer on acquisition function
        acq_func_opt_kwargs = {
            'acquisition_function': acquisition_function,
            'config_space': scenario.cs,
            'rng': rng,
        }
        if acquisition_function_optimizer_kwargs is not None:
            acq_func_opt_kwargs.update(acquisition_function_optimizer_kwargs)
        if acquisition_function_optimizer is None:
            for key, value in {
                    'max_steps': scenario.sls_max_steps,
                    'n_steps_plateau_walk': scenario.sls_n_steps_plateau_walk,
            }.items():
                if key not in acq_func_opt_kwargs:
                    acq_func_opt_kwargs[key] = value
            acquisition_function_optimizer = InterleavedLocalAndRandomSearch(
                **acq_func_opt_kwargs)
        elif inspect.isclass(acquisition_function_optimizer):
            acquisition_function_optimizer = acquisition_function_optimizer(
                **acq_func_opt_kwargs)
        else:
            raise TypeError(
                "Argument acquisition_function_optimizer must be None or an object implementing the "
                "AcquisitionFunctionMaximizer, but is '%s'" %
                type(acquisition_function_optimizer))

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        tae_def_kwargs = {
            'stats': self.stats,
            'run_obj': scenario.run_obj,
            'runhistory': runhistory,
            'par_factor': scenario.par_factor,
            'cost_for_crash': scenario.cost_for_crash,
            'abort_on_first_run_crash': scenario.abort_on_first_run_crash
        }
        if tae_runner_kwargs is not None:
            tae_def_kwargs.update(tae_runner_kwargs)
        if 'ta' not in tae_def_kwargs:
            tae_def_kwargs['ta'] = scenario.ta
        if tae_runner is None:
            tae_def_kwargs['ta'] = scenario.ta
            tae_runner = ExecuteTARunOld(**tae_def_kwargs)
        elif inspect.isclass(tae_runner):
            tae_runner = tae_runner(**tae_def_kwargs)
        elif callable(tae_runner):
            tae_def_kwargs['ta'] = tae_runner
            tae_runner = ExecuteTAFuncDict(**tae_def_kwargs)
        else:
            raise TypeError(
                "Argument 'tae_runner' is %s, but must be "
                "either None, a callable or an object implementing "
                "ExecuteTaRun. Passing 'None' will result in the "
                "creation of target algorithm runner based on the "
                "call string in the scenario file." % type(tae_runner))

        # Check that overall objective and tae objective are the same
        if tae_runner.run_obj != scenario.run_obj:
            raise ValueError("Objective for the target algorithm runner and "
                             "the scenario must be the same, but are '%s' and "
                             "'%s'" % (tae_runner.run_obj, scenario.run_obj))

        # initialize intensification
        intensifier_def_kwargs = {
            'tae_runner':
            tae_runner,
            'stats':
            self.stats,
            'traj_logger':
            traj_logger,
            'rng':
            rng,
            'instances':
            scenario.train_insts,
            'cutoff':
            scenario.cutoff,
            'deterministic':
            scenario.deterministic,
            'run_obj_time':
            scenario.run_obj == "runtime",
            'always_race_against':
            scenario.cs.get_default_configuration()
            if scenario.always_race_default else None,
            'use_ta_time_bound':
            scenario.use_ta_time,
            'instance_specifics':
            scenario.instance_specific,
            'minR':
            scenario.minR,
            'maxR':
            scenario.maxR,
            'adaptive_capping_slackfactor':
            scenario.intens_adaptive_capping_slackfactor,
            'min_chall':
            scenario.intens_min_chall,
        }
        if hasattr(scenario,
                   'filter_callback') and scenario.filter_callback is not None:
            print('update callback')
            intensifier_def_kwargs.update(
                {'filter_callback': scenario.filter_callback})
        if intensifier_kwargs is not None:
            intensifier_def_kwargs.update(intensifier_kwargs)
        if intensifier is None:
            intensifier = Intensifier(**intensifier_def_kwargs)
        elif inspect.isclass(intensifier):
            intensifier = intensifier(**intensifier_def_kwargs)
        else:
            raise TypeError(
                "Argument intensifier must be None or an object implementing the Intensifier, but is '%s'"
                % type(intensifier))

        # initial design
        if initial_design is not None and initial_configurations is not None:
            initial_design.initial_configurations = initial_configurations
            initial_configurations = None
        init_design_def_kwargs = {
            'tae_runner': tae_runner,
            'scenario': scenario,
            'stats': self.stats,
            'traj_logger': traj_logger,
            'runhistory': runhistory,
            'rng': rng,
            'configs': initial_configurations,
            'intensifier': intensifier,
            'aggregate_func': aggregate_func,
            'n_configs_x_params': 0,
            'max_config_fracs': 0.0,
            'initial_configurations': initial_design.initial_configurations
        }
        if initial_design_kwargs is not None:
            init_design_def_kwargs.update(initial_design_kwargs)
        if initial_configurations is not None:
            initial_design = InitialDesign(**init_design_def_kwargs)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":
                init_design_def_kwargs['max_config_fracs'] = 0.0
                initial_design = DefaultConfiguration(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "RANDOM":
                init_design_def_kwargs['max_config_fracs'] = 0.0
                initial_design = RandomConfigurations(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "LHD":
                initial_design = LHDesign(**init_design_def_kwargs)
            elif scenario.initial_incumbent == "FACTORIAL":
                initial_design = FactorialInitialDesign(
                    **init_design_def_kwargs)
            elif scenario.initial_incumbent == "SOBOL":
                initial_design = SobolDesign(**init_design_def_kwargs)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" % scenario.initial_incumbent)
        elif inspect.isclass(initial_design):
            initial_design = initial_design(**init_design_def_kwargs)
        else:
            raise TypeError(
                "Argument initial_design must be None or an object implementing the InitialDesign, but is '%s'"
                % type(initial_design))

        # if we log the performance data,
        # the RFRImputator will already get
        # log transform data from the runhistory
        if scenario.transform_y in ["LOG", "LOGS"]:
            cutoff = np.log(np.nanmin([np.inf, np.float_(scenario.cutoff)]))
            threshold = cutoff + np.log(scenario.par_factor)
        else:
            cutoff = np.nanmin([np.inf, np.float_(scenario.cutoff)])
            threshold = cutoff * scenario.par_factor
        num_params = len(scenario.cs.get_hyperparameters())
        imputor = RFRImputator(rng=rng,
                               cutoff=cutoff,
                               threshold=threshold,
                               model=model,
                               change_threshold=0.01,
                               max_iter=2)

        r2e_def_kwargs = {
            'scenario': scenario,
            'num_params': num_params,
            'success_states': [
                StatusType.SUCCESS,
            ],
            'impute_censored_data': True,
            'impute_state': [
                StatusType.CAPPED,
            ],
            'imputor': imputor,
            'scale_perc': 5
        }
        if scenario.run_obj == 'quality':
            r2e_def_kwargs.update({
                'success_states': [StatusType.SUCCESS, StatusType.CRASHED],
                'impute_censored_data':
                False,
                'impute_state':
                None,
            })
        if runhistory2epm_kwargs is not None:
            r2e_def_kwargs.update(runhistory2epm_kwargs)
        if runhistory2epm is None:
            if scenario.run_obj == 'runtime':
                runhistory2epm = RunHistory2EPM4LogCost(**r2e_def_kwargs)
            elif scenario.run_obj == 'quality':
                if scenario.transform_y == "NONE":
                    runhistory2epm = RunHistory2EPM4Cost(**r2e_def_kwargs)
                elif scenario.transform_y == "LOG":
                    runhistory2epm = RunHistory2EPM4LogCost(**r2e_def_kwargs)
                elif scenario.transform_y == "LOGS":
                    runhistory2epm = RunHistory2EPM4LogScaledCost(
                        **r2e_def_kwargs)
                elif scenario.transform_y == "INVS":
                    runhistory2epm = RunHistory2EPM4InvScaledCost(
                        **r2e_def_kwargs)
            else:
                raise ValueError('Unknown run objective: %s. Should be either '
                                 'quality or runtime.' % self.scenario.run_obj)
        elif inspect.isclass(runhistory2epm):
            runhistory2epm = runhistory2epm(**r2e_def_kwargs)
        else:
            raise TypeError(
                "Argument runhistory2epm must be None or an object implementing the RunHistory2EPM, but is '%s'"
                % type(runhistory2epm))

        smbo_args = {
            'scenario': scenario,
            'stats': self.stats,
            'initial_design': initial_design,
            'runhistory': runhistory,
            'runhistory2epm': runhistory2epm,
            'intensifier': intensifier,
            'aggregate_func': aggregate_func,
            'num_run': run_id,
            'model': model,
            'acq_optimizer': acquisition_function_optimizer,
            'acquisition_func': acquisition_function,
            'rng': rng,
            'restore_incumbent': restore_incumbent,
            'random_configuration_chooser': random_configuration_chooser
        }

        if smbo_class is None:
            self.solver = SMBO(**smbo_args)
        else:
            self.solver = smbo_class(**smbo_args)
Beispiel #3
0
class Validator(object):
    """
    Validator for the output of SMAC-scenarios.
    Evaluates specified configurations on specified instances.
    """
    def __init__(self,
                 scenario: Scenario,
                 trajectory: list,
                 rng: Union[np.random.RandomState, int] = None):
        """
        Construct Validator for given scenario and trajectory.

        Parameters
        ----------
        scenario: Scenario
            scenario object for cutoff, instances, features and specifics
        trajectory: trajectory-list
            trajectory to take incumbent(s) from
        rng: np.random.RandomState or int
            Random number generator or seed
        """
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        self.traj = trajectory
        self.scen = scenario
        self.epm = None

        if isinstance(rng, np.random.RandomState):
            self.rng = rng
        elif isinstance(rng, int):
            self.rng = np.random.RandomState(seed=rng)
        else:
            self.logger.debug('no seed given, using default seed of 1')
            num_run = 1
            self.rng = np.random.RandomState(seed=num_run)

    def _save_results(self, rh: RunHistory, output_fn, backup_fn=None):
        """ Helper to save results to file

        Parameters
        ----------
        rh: RunHistory
            runhistory to save
        output_fn: str
            if ends on '.json': filename to save history to
            else: directory to save runhistory to (filename is backup_fn)
        backup_fn: str
            if output_fn does not end on '.json', treat output_fn as dir and
            append backup_fn as filename (if output_fn ends on '.json', this
            argument is ignored)
        """
        if output_fn == "":
            self.logger.info(
                "No output specified, validated runhistory not saved.")
            return
        # Check if a folder or a file is specified as output
        if not output_fn.endswith('.json'):
            output_dir = output_fn
            output_fn = os.path.join(output_dir, backup_fn)
            self.logger.debug("Output is \"%s\", changing to \"%s\"!",
                              output_dir, output_fn)
        base = os.path.split(output_fn)[0]
        if not base == "" and not os.path.exists(base):
            self.logger.debug("Folder (\"%s\") doesn't exist, creating.", base)
            os.makedirs(base)
        rh.save_json(output_fn)
        self.logger.info("Saving validation-results in %s", output_fn)

    def validate(
        self,
        config_mode: Union[str, typing.List[Configuration]] = 'def',
        instance_mode: Union[str, typing.List[str]] = 'test',
        repetitions: int = 1,
        n_jobs: int = 1,
        backend: str = 'threading',
        runhistory: RunHistory = None,
        tae: ExecuteTARun = None,
        output_fn: str = "",
    ) -> RunHistory:
        """
        Validate configs on instances and save result in runhistory.
        If a runhistory is provided as input it is important that you run it on the same/comparable hardware.

        side effect: if output is specified, saves runhistory to specified
        output directory.

        Parameters
        ----------
        config_mode: str or list<Configuration>
            string or directly a list of Configuration.
            string from [def, inc, def+inc, wallclock_time, cpu_time, all].
            time evaluates at cpu- or wallclock-timesteps of:
            [max_time/2^0, max_time/2^1, max_time/2^3, ..., default]
            with max_time being the highest recorded time
        instance_mode: str or list<str>
            what instances to use for validation, either from
            [train, test, train+test] or directly a list of instances
        repetitions: int
            number of repetitions in nondeterministic algorithms
        n_jobs: int
            number of parallel processes used by joblib
        backend: str
            what backend joblib should use for parallel runs
        runhistory: RunHistory
            optional, RunHistory-object to reuse runs
        tae: ExecuteTARun
            tae to be used. if None, will initialize ExecuteTARunOld
        output_fn: str
            path to runhistory to be saved. if the suffix is not '.json', will
            be interpreted as directory and filename will be
            'validated_runhistory.json'

        Returns
        -------
        runhistory: RunHistory
            runhistory with validated runs
        """
        self.logger.debug(
            "Validating configs '%s' on instances '%s', repeating %d times"
            " with %d parallel runs on backend '%s'.", config_mode,
            instance_mode, repetitions, n_jobs, backend)

        # Get all runs to be evaluated as list
        runs, validated_rh = self._get_runs(config_mode, instance_mode,
                                            repetitions, runhistory)

        # Create new Stats without limits
        inf_scen = Scenario({
            'run_obj': self.scen.run_obj,
            'cutoff_time': self.scen.cutoff,
            'output_dir': ""
        })
        inf_stats = Stats(inf_scen)
        inf_stats.start_timing()

        # Create TAE
        if not tae:
            tae = ExecuteTARunOld(ta=self.scen.ta,
                                  stats=inf_stats,
                                  run_obj=self.scen.run_obj,
                                  par_factor=self.scen.par_factor,
                                  cost_for_crash=self.scen.cost_for_crash)
        else:
            # Inject endless-stats
            tae.stats = inf_stats

        # Validate!
        run_results = self._validate_parallel(tae, runs, n_jobs, backend)

        # tae returns (status, cost, runtime, additional_info)
        # Add runs to RunHistory
        idx = 0
        for result in run_results:
            validated_rh.add(config=runs[idx].config,
                             cost=result[1],
                             time=result[2],
                             status=result[0],
                             instance_id=runs[idx].inst,
                             seed=runs[idx].seed,
                             additional_info=result[3])
            idx += 1

        if output_fn:
            self._save_results(validated_rh,
                               output_fn,
                               backup_fn="validated_runhistory.json")
        return validated_rh

    def _validate_parallel(self, tae: ExecuteTARun, runs: typing.List[_Run],
                           n_jobs: int, backend: str):
        """
        Validate runs with joblibs Parallel-interface

        Parameters
        ----------
        tae: ExecuteTARun
            tae to be used for validation
        runs: list<_Run>
            list with _Run-objects
            [_Run(config=CONFIG1,inst=INSTANCE1,seed=SEED1,inst_specs=INST_SPECIFICS1), ...]
        n_jobs: int
            number of cpus to use for validation (-1 to use all)
        backend: str
            what backend to use for parallelization

        Returns
        -------
        run_results: list<tuple(tae-returns)>
            results as returned by tae
        """
        # Runs with parallel
        run_results = Parallel(n_jobs=n_jobs, backend=backend)(
            delayed(_unbound_tae_starter)(tae,
                                          run.config,
                                          run.inst,
                                          self.scen.cutoff,
                                          run.seed,
                                          run.inst_specs,
                                          capped=False) for run in runs)
        return run_results

    def validate_epm(
        self,
        config_mode: Union[str, typing.List[Configuration]] = 'def',
        instance_mode: Union[str, typing.List[str]] = 'test',
        repetitions: int = 1,
        runhistory: RunHistory = None,
        output_fn="",
        reuse_epm=True,
    ) -> RunHistory:
        """
        Use EPM to predict costs/runtimes for unknown config/inst-pairs.

        side effect: if output is specified, saves runhistory to specified
        output directory.

        Parameters
        ----------
        output_fn: str
            path to runhistory to be saved. if the suffix is not '.json', will
            be interpreted as directory and filename will be
            'validated_runhistory_EPM.json'
        config_mode: str or list<Configuration>
            string or directly a list of Configuration, string from [def, inc, def+inc, wallclock_time, cpu_time, all].
            time evaluates at cpu- or wallclock-timesteps of:
            [max_time/2^0, max_time/2^1, max_time/2^3, ..., default] with max_time being the highest recorded time
        instance_mode: str or list<str>
            what instances to use for validation, either from
            [train, test, train+test] or directly a list of instances
        repetitions: int
            number of repetitions in nondeterministic algorithms
        runhistory: RunHistory
            optional, RunHistory-object to reuse runs
        reuse_epm: bool
            if true (and if `self.epm`), reuse epm to validate runs

        Returns
        -------
        runhistory: RunHistory
            runhistory with predicted runs
        """
        if not isinstance(runhistory, RunHistory) and (self.epm is None
                                                       or reuse_epm is False):
            raise ValueError(
                "No runhistory specified for validating with EPM!")
        elif reuse_epm is False or self.epm is None:
            # Create RandomForest
            types, bounds = get_types(self.scen.cs, self.scen.feature_array)
            self.epm = RandomForestWithInstances(
                configspace=self.scen.cs,
                types=types,
                bounds=bounds,
                instance_features=self.scen.feature_array,
                seed=self.rng.randint(MAXINT),
                ratio_features=1.0,
            )
            # Use imputor if objective is runtime
            imputor = None
            impute_state = None
            impute_censored_data = False
            if self.scen.run_obj == 'runtime':
                threshold = self.scen.cutoff * self.scen.par_factor
                imputor = RFRImputator(rng=self.rng,
                                       cutoff=self.scen.cutoff,
                                       threshold=threshold,
                                       model=self.epm)
                impute_censored_data = True
                impute_state = [StatusType.CAPPED]
            # Transform training data (from given rh)
            rh2epm = RunHistory2EPM4Cost(
                num_params=len(self.scen.cs.get_hyperparameters()),
                scenario=self.scen,
                rng=self.rng,
                impute_censored_data=impute_censored_data,
                imputor=imputor,
                impute_state=impute_state)
            X, y = rh2epm.transform(runhistory)
            self.logger.debug("Training model with data of shape X: %s, y:%s",
                              str(X.shape), str(y.shape))
            # Train random forest
            self.epm.train(X, y)

        # Predict desired runs
        runs, rh_epm = self._get_runs(config_mode, instance_mode, repetitions,
                                      runhistory)

        feature_array_size = len(self.scen.cs.get_hyperparameters())
        if self.scen.feature_array is not None:
            feature_array_size += self.scen.feature_array.shape[1]

        X_pred = np.empty((len(runs), feature_array_size))
        for idx, run in enumerate(runs):
            if self.scen.feature_array is not None and run.inst is not None:
                X_pred[idx] = np.hstack([
                    convert_configurations_to_array([run.config])[0],
                    self.scen.feature_dict[run.inst]
                ])
            else:
                X_pred[idx] = convert_configurations_to_array([run.config])[0]
        self.logger.debug("Predicting desired %d runs, data has shape %s",
                          len(runs), str(X_pred.shape))

        y_pred = self.epm.predict(X_pred)

        # Add runs to runhistory
        for run, pred in zip(runs, y_pred[0]):
            rh_epm.add(
                config=run.config,
                cost=float(pred),
                time=float(pred),
                status=StatusType.SUCCESS,
                instance_id=run.inst,
                seed=-1,
                additional_info={"additional_info": "ESTIMATED USING EPM!"})

        if output_fn:
            self._save_results(rh_epm,
                               output_fn,
                               backup_fn="validated_runhistory_EPM.json")
        return rh_epm

    def _get_runs(
        self,
        configs: Union[str, typing.List[Configuration]],
        insts: Union[str, typing.List[str]],
        repetitions: int = 1,
        runhistory: RunHistory = None,
    ) -> typing.Tuple[typing.List[_Run], RunHistory]:
        """
        Generate list of SMAC-TAE runs to be executed. This means
        combinations of configs with all instances on a certain number of seeds.

        side effect: Adds runs that don't need to be reevaluated to self.rh!

        Parameters
        ----------
        configs: str or list<Configuration>
            string or directly a list of Configuration
            str from [def, inc, def+inc, wallclock_time, cpu_time, all]
                time evaluates at cpu- or wallclock-timesteps of:
                [max_time/2^0, max_time/2^1, max_time/2^3, ..., default]
                with max_time being the highest recorded time
        insts: str or list<str>
            what instances to use for validation, either from
            [train, test, train+test] or directly a list of instances
        repetitions: int
            number of seeds per instance/config-pair to be evaluated
        runhistory: RunHistory
            optional, try to reuse this runhistory and save some runs

        Returns
        -------
        runs: list<_Run>
            list with _Runs
            [_Run(config=CONFIG1,inst=INSTANCE1,seed=SEED1,inst_specs=INST_SPECIFICS1),
             _Run(config=CONFIG2,inst=INSTANCE2,seed=SEED2,inst_specs=INST_SPECIFICS2),
             ...]
        """
        # Get relevant configurations and instances
        if isinstance(configs, str):
            configs = self._get_configs(configs)
        if isinstance(insts, str):
            insts = self._get_instances(insts)

        # If no instances are given, fix the instances to one "None" instance
        if not insts:
            insts = [None]
        # If algorithm is deterministic, fix repetitions to 1
        if self.scen.deterministic and repetitions != 1:
            self.logger.warning(
                "Specified %d repetitions, but fixing to 1, "
                "because algorithm is deterministic.", repetitions)
            repetitions = 1

        # Extract relevant information from given runhistory
        inst_seed_config = self._process_runhistory(configs, insts, runhistory)

        # Now create the actual run-list
        runs = []
        # Counter for runs without the need of recalculation
        runs_from_rh = 0
        # If we reuse runs, we want to return them as well
        new_rh = RunHistory(average_cost)

        for i in sorted(insts):
            for rep in range(repetitions):
                # First, find a seed and add all the data we can take from the
                # given runhistory to "our" validation runhistory.
                configs_evaluated = []
                if runhistory and i in inst_seed_config:
                    # Choose seed based on most often evaluated inst-seed-pair
                    seed, configs_evaluated = inst_seed_config[i].pop(0)
                    # Delete inst if all seeds are used
                    if not inst_seed_config[i]:
                        inst_seed_config.pop(i)
                    # Add runs to runhistory
                    for c in configs_evaluated[:]:
                        runkey = RunKey(runhistory.config_ids[c], i, seed)
                        cost, time, status, additional_info = runhistory.data[
                            runkey]
                        if status in [
                                StatusType.CRASHED, StatusType.ABORT,
                                StatusType.CAPPED
                        ]:
                            # Not properly executed target algorithm runs should be repeated
                            configs_evaluated.remove(c)
                            continue
                        new_rh.add(c,
                                   cost,
                                   time,
                                   status,
                                   instance_id=i,
                                   seed=seed,
                                   additional_info=additional_info)
                        runs_from_rh += 1
                else:
                    # If no runhistory or no entries for instance, get new seed
                    seed = self.rng.randint(MAXINT)

                # We now have a seed and add all configs that are not already
                # evaluated on that seed to the runs-list. This way, we
                # guarantee the same inst-seed-pairs for all configs.
                for config in [
                        c for c in configs if not c in configs_evaluated
                ]:
                    # Only use specifics if specific exists, else use string "0"
                    specs = self.scen.instance_specific[
                        i] if i and i in self.scen.instance_specific else "0"
                    runs.append(
                        _Run(config=config,
                             inst=i,
                             seed=seed,
                             inst_specs=specs))

        self.logger.info(
            "Collected %d runs from %d configurations on %d "
            "instances with %d repetitions. Reusing %d runs from "
            "given runhistory.", len(runs), len(configs), len(insts),
            repetitions, runs_from_rh)

        return runs, new_rh

    def _process_runhistory(self, configs: typing.List[Configuration],
                            insts: typing.List[str], runhistory: RunHistory):
        """
        Processes runhistory from self._get_runs by extracting already evaluated
        (relevant) config-inst-seed tuples.

        Parameters
        ----------
        configs: list(Configuration)
            list of configs of interest
        insts: list(str)
            list of instances of interest
        runhistory: RunHistory
            runhistory to extract runs from

        Returns
        -------
        inst_seed_config: dict<str : list(tuple(int, tuple(configs)))>
            dictionary mapping instances to a list of tuples of already used
            seeds and the configs that this inst-seed-pair has been evaluated
            on, sorted by the number of configs
        """
        # We want to reuse seeds that have been used on most configurations
        # To this end, we create a dictionary as {instances:{seed:[configs]}}
        # Like this we can easily retrieve the most used instance-seed pairs to
        # minimize the number of runs to be evaluated
        inst_seed_config = {}
        if runhistory:
            relevant = dict()
            for key in runhistory.data:
                if (runhistory.ids_config[key.config_id] in configs
                        and key.instance_id in insts):
                    relevant[key] = runhistory.data[key]

            # Change data-structure to {instances:[(seed1, (configs)), (seed2, (configs), ... ]}
            # to make most used seed easily accessible, we sort after length of configs
            for key in relevant:
                inst, seed = key.instance_id, key.seed
                config = runhistory.ids_config[key.config_id]
                if inst in inst_seed_config:
                    if seed in inst_seed_config[inst]:
                        inst_seed_config[inst][seed].append(config)
                    else:
                        inst_seed_config[inst][seed] = [config]
                else:
                    inst_seed_config[inst] = {seed: [config]}

            inst_seed_config = {
                i: sorted([(seed, list(inst_seed_config[i][seed]))
                           for seed in inst_seed_config[i]],
                          key=lambda x: len(x[1]))
                for i in inst_seed_config
            }
        return inst_seed_config

    def _get_configs(self, mode: str) -> typing.List[str]:
        """
        Return desired configs

        Parameters
        ----------
        mode: str
            str from [def, inc, def+inc, wallclock_time, cpu_time, all]
                time evaluates at cpu- or wallclock-timesteps of:
                [max_time/2^0, max_time/2^1, max_time/2^3, ..., default]
                with max_time being the highest recorded time

        Returns
        -------
        configs: list<Configuration>
            list with desired configurations
        """
        # Add desired configs
        configs = []
        mode = mode.lower()
        if mode not in [
                'def', 'inc', 'def+inc', 'wallclock_time', 'cpu_time', 'all'
        ]:
            raise ValueError(
                "%s not a valid option for config_mode in validation." % mode)
        if mode == "def" or mode == "def+inc":
            configs.append(self.scen.cs.get_default_configuration())
        if mode == "inc" or mode == "def+inc":
            configs.append(self.traj[-1]["incumbent"])
        if mode in ["wallclock_time", "cpu_time"]:
            # get highest time-entry and add entries from there
            # not using wallclock_limit in case it's inf
            if (mode == "wallclock_time"
                    and np.isfinite(self.scen.wallclock_limit)):
                max_time = self.scen.wallclock_limit
            elif (mode == "cpu_time"
                  and np.isfinite(self.scen.algo_runs_timelimit)):
                max_time = self.scen.algo_runs_timelimit
            else:
                max_time = self.traj[-1][mode]
            counter = 2**0
            for entry in self.traj[::-1]:
                if (entry[mode] <= max_time / counter
                        and entry["incumbent"] not in configs):
                    configs.append(entry["incumbent"])
                    counter *= 2
            if not self.traj[0]["incumbent"] in configs:
                configs.append(self.traj[0]["incumbent"])  # add first
        if mode == "all":
            for entry in self.traj:
                if not entry["incumbent"] in configs:
                    configs.append(entry["incumbent"])
        self.logger.debug("Gathered %d configurations for mode %s.",
                          len(configs), mode)
        return configs

    def _get_instances(self, mode: str) -> typing.List[str]:
        """
        Get desired instances

        Parameters
        ----------
        mode: str
            what instances to use for validation, from [train, test, train+test]

        Returns
        -------
        instances: list<str>
            instances to be used
        """
        instance_mode = mode.lower()
        if mode not in ['train', 'test', 'train+test']:
            raise ValueError(
                "%s not a valid option for instance_mode in validation." %
                mode)

        # Make sure if instances matter, than instances should be passed
        if ((instance_mode == 'train' and self.scen.train_insts == [None]) or
            (instance_mode == 'test' and self.scen.test_insts == [None])):
            self.logger.warning(
                "Instance mode is set to %s, but there are no "
                "%s-instances specified in the scenario. Setting instance mode to"
                "\"train+test\"!", instance_mode, instance_mode)
            instance_mode = 'train+test'

        instances = []
        if ((instance_mode == 'train' or instance_mode == 'train+test')
                and not self.scen.train_insts == [None]):
            instances.extend(self.scen.train_insts)
        if ((instance_mode == 'test' or instance_mode == 'train+test')
                and not self.scen.test_insts == [None]):
            instances.extend(self.scen.test_insts)
        return instances
Beispiel #4
0
    def validate_epm(
        self,
        config_mode: Union[str, typing.List[Configuration]] = 'def',
        instance_mode: Union[str, typing.List[str]] = 'test',
        repetitions: int = 1,
        runhistory: RunHistory = None,
        output_fn="",
        reuse_epm=True,
    ) -> RunHistory:
        """
        Use EPM to predict costs/runtimes for unknown config/inst-pairs.

        side effect: if output is specified, saves runhistory to specified
        output directory.

        Parameters
        ----------
        output_fn: str
            path to runhistory to be saved. if the suffix is not '.json', will
            be interpreted as directory and filename will be
            'validated_runhistory_EPM.json'
        config_mode: str or list<Configuration>
            string or directly a list of Configuration, string from [def, inc, def+inc, wallclock_time, cpu_time, all].
            time evaluates at cpu- or wallclock-timesteps of:
            [max_time/2^0, max_time/2^1, max_time/2^3, ..., default] with max_time being the highest recorded time
        instance_mode: str or list<str>
            what instances to use for validation, either from
            [train, test, train+test] or directly a list of instances
        repetitions: int
            number of repetitions in nondeterministic algorithms
        runhistory: RunHistory
            optional, RunHistory-object to reuse runs
        reuse_epm: bool
            if true (and if `self.epm`), reuse epm to validate runs

        Returns
        -------
        runhistory: RunHistory
            runhistory with predicted runs
        """
        if not isinstance(runhistory, RunHistory) and (self.epm is None
                                                       or reuse_epm is False):
            raise ValueError(
                "No runhistory specified for validating with EPM!")
        elif reuse_epm is False or self.epm is None:
            # Create RandomForest
            types, bounds = get_types(self.scen.cs, self.scen.feature_array)
            self.epm = RandomForestWithInstances(
                configspace=self.scen.cs,
                types=types,
                bounds=bounds,
                instance_features=self.scen.feature_array,
                seed=self.rng.randint(MAXINT),
                ratio_features=1.0,
            )
            # Use imputor if objective is runtime
            imputor = None
            impute_state = None
            impute_censored_data = False
            if self.scen.run_obj == 'runtime':
                threshold = self.scen.cutoff * self.scen.par_factor
                imputor = RFRImputator(rng=self.rng,
                                       cutoff=self.scen.cutoff,
                                       threshold=threshold,
                                       model=self.epm)
                impute_censored_data = True
                impute_state = [StatusType.CAPPED]
            # Transform training data (from given rh)
            rh2epm = RunHistory2EPM4Cost(
                num_params=len(self.scen.cs.get_hyperparameters()),
                scenario=self.scen,
                rng=self.rng,
                impute_censored_data=impute_censored_data,
                imputor=imputor,
                impute_state=impute_state)
            X, y = rh2epm.transform(runhistory)
            self.logger.debug("Training model with data of shape X: %s, y:%s",
                              str(X.shape), str(y.shape))
            # Train random forest
            self.epm.train(X, y)

        # Predict desired runs
        runs, rh_epm = self._get_runs(config_mode, instance_mode, repetitions,
                                      runhistory)

        feature_array_size = len(self.scen.cs.get_hyperparameters())
        if self.scen.feature_array is not None:
            feature_array_size += self.scen.feature_array.shape[1]

        X_pred = np.empty((len(runs), feature_array_size))
        for idx, run in enumerate(runs):
            if self.scen.feature_array is not None and run.inst is not None:
                X_pred[idx] = np.hstack([
                    convert_configurations_to_array([run.config])[0],
                    self.scen.feature_dict[run.inst]
                ])
            else:
                X_pred[idx] = convert_configurations_to_array([run.config])[0]
        self.logger.debug("Predicting desired %d runs, data has shape %s",
                          len(runs), str(X_pred.shape))

        y_pred = self.epm.predict(X_pred)

        # Add runs to runhistory
        for run, pred in zip(runs, y_pred[0]):
            rh_epm.add(
                config=run.config,
                cost=float(pred),
                time=float(pred),
                status=StatusType.SUCCESS,
                instance_id=run.inst,
                seed=-1,
                additional_info={"additional_info": "ESTIMATED USING EPM!"})

        if output_fn:
            self._save_results(rh_epm,
                               output_fn,
                               backup_fn="validated_runhistory_EPM.json")
        return rh_epm
Beispiel #5
0
    def _component_builder(self, conf: typing.Union[Configuration, dict]) \
            -> typing.Tuple[AbstractAcquisitionFunction, AbstractEPM]:
        """
            builds new Acquisition function object
            and EPM object and returns these

            Parameters
            ----------
            conf: typing.Union[Configuration, dict]
                configuration specificing "model" and "acq_func"

            Returns
            -------
            typing.Tuple[AbstractAcquisitionFunction, AbstractEPM]

        """
        types, bounds = get_types(
            self.config_space, instance_features=self.scenario.feature_array)

        if conf["model"] == "RF":
            model = RandomForestWithInstances(
                configspace=self.config_space,
                types=types,
                bounds=bounds,
                instance_features=self.scenario.feature_array,
                seed=self.rng.randint(MAXINT),
                pca_components=conf.get("pca_dim", self.scenario.PCA_DIM),
                log_y=conf.get("log_y", self.scenario.transform_y
                               in ["LOG", "LOGS"]),
                num_trees=conf.get("num_trees", self.scenario.rf_num_trees),
                do_bootstrapping=conf.get("do_bootstrapping",
                                          self.scenario.rf_do_bootstrapping),
                ratio_features=conf.get("ratio_features",
                                        self.scenario.rf_ratio_features),
                min_samples_split=conf.get("min_samples_split",
                                           self.scenario.rf_min_samples_split),
                min_samples_leaf=conf.get("min_samples_leaf",
                                          self.scenario.rf_min_samples_leaf),
                max_depth=conf.get("max_depth", self.scenario.rf_max_depth),
            )

        elif conf["model"] == "GP":
            from dsmac.epm.gp_kernels import ConstantKernel, HammingKernel, WhiteKernel, Matern

            cov_amp = ConstantKernel(
                2.0,
                constant_value_bounds=(np.exp(-10), np.exp(2)),
                prior=LognormalPrior(mean=0.0, sigma=1.0, rng=self.rng),
            )

            cont_dims = np.nonzero(types == 0)[0]
            cat_dims = np.nonzero(types != 0)[0]

            if len(cont_dims) > 0:
                exp_kernel = Matern(
                    np.ones([len(cont_dims)]),
                    [(np.exp(-10), np.exp(2)) for _ in range(len(cont_dims))],
                    nu=2.5,
                    operate_on=cont_dims,
                )

            if len(cat_dims) > 0:
                ham_kernel = HammingKernel(
                    np.ones([len(cat_dims)]),
                    [(np.exp(-10), np.exp(2)) for _ in range(len(cat_dims))],
                    operate_on=cat_dims,
                )
            noise_kernel = WhiteKernel(
                noise_level=1e-8,
                noise_level_bounds=(np.exp(-25), np.exp(2)),
                prior=HorseshoePrior(scale=0.1, rng=self.rng),
            )

            if len(cont_dims) > 0 and len(cat_dims) > 0:
                # both
                kernel = cov_amp * (exp_kernel * ham_kernel) + noise_kernel
            elif len(cont_dims) > 0 and len(cat_dims) == 0:
                # only cont
                kernel = cov_amp * exp_kernel + noise_kernel
            elif len(cont_dims) == 0 and len(cat_dims) > 0:
                # only cont
                kernel = cov_amp * ham_kernel + noise_kernel
            else:
                raise ValueError()

            n_mcmc_walkers = 3 * len(kernel.theta)
            if n_mcmc_walkers % 2 == 1:
                n_mcmc_walkers += 1

            model = GaussianProcessMCMC(
                self.config_space,
                types=types,
                bounds=bounds,
                kernel=kernel,
                n_mcmc_walkers=n_mcmc_walkers,
                chain_length=250,
                burnin_steps=250,
                normalize_y=True,
                seed=self.rng.randint(low=0, high=10000),
            )

        if conf["acq_func"] == "EI":
            acq = EI(model=model, par=conf.get("par_ei", 0))
        elif conf["acq_func"] == "LCB":
            acq = LCB(model=model, par=conf.get("par_lcb", 0))
        elif conf["acq_func"] == "PI":
            acq = PI(model=model, par=conf.get("par_pi", 0))
        elif conf["acq_func"] == "LogEI":
            # par value should be in log-space
            acq = LogEI(model=model, par=conf.get("par_logei", 0))

        return acq, model
Beispiel #6
0
    def __init__(
            self,
            scenario: Scenario,
            # TODO: once we drop python3.4 add type hint
            # typing.Union[ExecuteTARun, callable]
            tae_runner=None,
            runhistory: RunHistory = None,
            intensifier: Intensifier = None,
            acquisition_function: AbstractAcquisitionFunction = None,
            model: AbstractEPM = None,
            runhistory2epm: AbstractRunHistory2EPM = None,
            initial_design: InitialDesign = None,
            initial_configurations: typing.List[Configuration] = None,
            stats: Stats = None,
            rng: np.random.RandomState = None,
            run_id: int = 1):
        """Constructor"""
        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

        aggregate_func = average_cost
        self.runhistory = None
        self.trajectory = None

        # initialize stats object
        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario, file_system=scenario.file_system)

        self.output_dir = create_output_directory(scenario, run_id)
        scenario.write()

        # initialize empty runhistory
        if runhistory is None:
            runhistory = RunHistory(aggregate_func=aggregate_func,
                                    file_system=scenario.file_system)
        # inject aggr_func if necessary
        if runhistory.aggregate_func is None:
            runhistory.aggregate_func = aggregate_func

        # initial random number generator
        num_run, rng = self._get_rng(rng=rng)

        # reset random number generator in config space to draw different
        # random configurations with each seed given to SMAC
        scenario.cs.seed(rng.randint(MAXINT))

        # initial Trajectory Logger
        traj_logger = TrajLogger(output_dir=self.output_dir,
                                 stats=self.stats,
                                 file_system=scenario.file_system)

        # initial EPM
        types, bounds = get_types(scenario.cs, scenario.feature_array)
        if model is None:
            model = RandomForestWithInstances(
                configspace=scenario.cs,
                types=types,
                bounds=bounds,
                instance_features=scenario.feature_array,
                seed=rng.randint(MAXINT),
                pca_components=scenario.PCA_DIM,
                num_trees=scenario.rf_num_trees,
                do_bootstrapping=scenario.rf_do_bootstrapping,
                ratio_features=scenario.rf_ratio_features,
                min_samples_split=scenario.rf_min_samples_split,
                min_samples_leaf=scenario.rf_min_samples_leaf,
                max_depth=scenario.rf_max_depth,
            )
        # initial acquisition function
        if acquisition_function is None:
            if scenario.run_obj == "runtime":
                acquisition_function = LogEI(model=model)
            else:
                acquisition_function = EI(model=model)
        # inject model if necessary
        if acquisition_function.model is None:
            acquisition_function.model = model

        # initialize optimizer on acquisition function
        local_search = LocalSearch(
            acquisition_function,
            scenario.cs,
            max_steps=scenario.sls_max_steps,
            n_steps_plateau_walk=scenario.sls_n_steps_plateau_walk)

        # initialize tae_runner
        # First case, if tae_runner is None, the target algorithm is a call
        # string in the scenario file
        if tae_runner is None:
            tae_runner = ExecuteTARunOld(
                ta=scenario.ta,
                stats=self.stats,
                run_obj=scenario.run_obj,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Second case, the tae_runner is a function to be optimized
        elif callable(tae_runner):
            tae_runner = ExecuteTAFuncDict(
                ta=tae_runner,
                stats=self.stats,
                run_obj=scenario.run_obj,
                memory_limit=scenario.memory_limit,
                runhistory=runhistory,
                par_factor=scenario.par_factor,
                cost_for_crash=scenario.cost_for_crash)
        # Third case, if it is an ExecuteTaRun we can simply use the
        # instance. Otherwise, the next check raises an exception
        elif not isinstance(tae_runner, ExecuteTARun):
            raise TypeError("Argument 'tae_runner' is %s, but must be "
                            "either a callable or an instance of "
                            "ExecuteTaRun. Passing 'None' will result in the "
                            "creation of target algorithm runner based on the "
                            "call string in the scenario file." %
                            type(tae_runner))

        # Check that overall objective and tae objective are the same
        if tae_runner.run_obj != scenario.run_obj:
            raise ValueError("Objective for the target algorithm runner and "
                             "the scenario must be the same, but are '%s' and "
                             "'%s'" % (tae_runner.run_obj, scenario.run_obj))

        # inject stats if necessary
        if tae_runner.stats is None:
            tae_runner.stats = self.stats
        # inject runhistory if necessary
        if tae_runner.runhistory is None:
            tae_runner.runhistory = runhistory
        # inject cost_for_crash
        if tae_runner.crash_cost != scenario.cost_for_crash:
            tae_runner.crash_cost = scenario.cost_for_crash

        # initialize intensification
        if intensifier is None:
            intensifier = Intensifier(
                tae_runner=tae_runner,
                stats=self.stats,
                traj_logger=traj_logger,
                rng=rng,
                instances=scenario.train_insts,
                cutoff=scenario.cutoff,
                deterministic=scenario.deterministic,
                run_obj_time=scenario.run_obj == "runtime",
                always_race_against=scenario.cs.get_default_configuration()
                if scenario.always_race_default else None,
                instance_specifics=scenario.instance_specific,
                minR=scenario.minR,
                maxR=scenario.maxR,
                adaptive_capping_slackfactor=scenario.
                intens_adaptive_capping_slackfactor,
                min_chall=scenario.intens_min_chall,
                distributer=scenario.distributer)
        # inject deps if necessary
        if intensifier.tae_runner is None:
            intensifier.tae_runner = tae_runner
        if intensifier.stats is None:
            intensifier.stats = self.stats
        if intensifier.traj_logger is None:
            intensifier.traj_logger = traj_logger

        # initial design
        if initial_design is not None and initial_configurations is not None:
            raise ValueError(
                "Either use initial_design or initial_configurations; but not both"
            )

        if initial_configurations is not None:
            initial_design = InitialDesign(tae_runner=tae_runner,
                                           scenario=scenario,
                                           stats=self.stats,
                                           traj_logger=traj_logger,
                                           runhistory=runhistory,
                                           rng=rng,
                                           configs=initial_configurations,
                                           intensifier=intensifier,
                                           aggregate_func=aggregate_func)
        elif initial_design is None:
            if scenario.initial_incumbent == "DEFAULT":
                initial_design = DefaultConfiguration(
                    tae_runner=tae_runner,
                    scenario=scenario,
                    stats=self.stats,
                    traj_logger=traj_logger,
                    runhistory=runhistory,
                    rng=rng,
                    intensifier=intensifier,
                    aggregate_func=aggregate_func,
                    max_config_fracs=0.0)
            elif scenario.initial_incumbent == "RANDOM":
                initial_design = RandomConfigurations(
                    tae_runner=tae_runner,
                    scenario=scenario,
                    stats=self.stats,
                    traj_logger=traj_logger,
                    runhistory=runhistory,
                    rng=rng,
                    intensifier=intensifier,
                    aggregate_func=aggregate_func,
                    max_config_fracs=0.0)
            else:
                raise ValueError("Don't know what kind of initial_incumbent "
                                 "'%s' is" % scenario.initial_incumbent)
        # inject deps if necessary
        if initial_design.tae_runner is None:
            initial_design.tae_runner = tae_runner
        if initial_design.scenario is None:
            initial_design.scenario = scenario
        if initial_design.stats is None:
            initial_design.stats = self.stats
        if initial_design.traj_logger is None:
            initial_design.traj_logger = traj_logger

        # initial conversion of runhistory into EPM data
        if runhistory2epm is None:

            num_params = len(scenario.cs.get_hyperparameters())
            if scenario.run_obj == "runtime":

                # if we log the performance data,
                # the RFRImputator will already get
                # log transform data from the runhistory
                cutoff = np.log(scenario.cutoff)
                threshold = np.log(scenario.cutoff * scenario.par_factor)

                imputor = RFRImputator(rng=rng,
                                       cutoff=cutoff,
                                       threshold=threshold,
                                       model=model,
                                       change_threshold=0.01,
                                       max_iter=2)

                runhistory2epm = RunHistory2EPM4LogCost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[
                        StatusType.SUCCESS,
                    ],
                    impute_censored_data=True,
                    impute_state=[
                        StatusType.CAPPED,
                    ],
                    imputor=imputor)

            elif scenario.run_obj == 'quality':
                runhistory2epm = RunHistory2EPM4Cost(
                    scenario=scenario,
                    num_params=num_params,
                    success_states=[
                        StatusType.SUCCESS,
                    ],
                    impute_censored_data=False,
                    impute_state=None)

            else:
                raise ValueError('Unknown run objective: %s. Should be either '
                                 'quality or runtime.' % self.scenario.run_obj)

        # inject scenario if necessary:
        if runhistory2epm.scenario is None:
            runhistory2epm.scenario = scenario

        self.solver = EPILS_Solver(scenario=scenario,
                                   stats=self.stats,
                                   initial_design=initial_design,
                                   runhistory=runhistory,
                                   runhistory2epm=runhistory2epm,
                                   intensifier=intensifier,
                                   aggregate_func=aggregate_func,
                                   num_run=num_run,
                                   model=model,
                                   acq_optimizer=local_search,
                                   acquisition_func=acquisition_function,
                                   rng=rng)