Beispiel #1
0
    def test_multiple_budgets(self):

        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs, values={"a": 1, "b": 2})

        rh.add(
            config=config1,
            cost=[10, 50],
            time=20,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
            budget=1,
        )

        self.assertEqual(rh.get_cost(config1), 1.0)

        # Only the higher budget gets included in the config cost
        # However, we expect that the bounds are changed
        rh.add(
            config=config1,
            cost=[20, 25],
            time=25,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
            budget=5,
        )

        self.assertEqual(rh.get_cost(config1), 0.5)
Beispiel #2
0
    def test_full_update(self):
        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs,
                                values={'a': 1, 'b': 2})
        config2 = Configuration(cs,
                                values={'a': 1, 'b': 3})
        rh.add(config=config1, cost=10, time=20,
               status=StatusType.SUCCESS, instance_id=1,
               seed=1)

        rh.add(config=config2, cost=10, time=20,
               status=StatusType.SUCCESS, instance_id=1,
               seed=1)

        rh.add(config=config2, cost=20, time=20,
               status=StatusType.SUCCESS, instance_id=2,
               seed=2)

        cost_config2 = rh.get_cost(config2)

        rh.compute_all_costs()
        updated_cost_config2 = rh.get_cost(config2)
        self.assertTrue(cost_config2 == updated_cost_config2)

        rh.compute_all_costs(instances=[2])
        updated_cost_config2 = rh.get_cost(config2)
        self.assertTrue(cost_config2 != updated_cost_config2)
        self.assertTrue(updated_cost_config2 == 20)
Beispiel #3
0
    def test_incremental_update(self):

        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs, values={"a": 1, "b": 2})

        rh.add(
            config=config1,
            cost=10,
            time=20,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
        )

        self.assertEqual(rh.get_cost(config1), 10)

        rh.add(
            config=config1,
            cost=20,
            time=20,
            status=StatusType.SUCCESS,
            instance_id=2,
            seed=1,
        )

        self.assertEqual(rh.get_cost(config1), 15)
Beispiel #4
0
    def test_multiple_budgets(self):

        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs, values={'a': 1, 'b': 2})

        rh.add(config=config1,
               cost=10,
               time=20,
               status=StatusType.SUCCESS,
               instance_id=1,
               seed=1,
               budget=1)

        self.assertEqual(rh.get_cost(config1), 10)

        # only the higher budget gets included in the config cost
        rh.add(config=config1,
               cost=20,
               time=20,
               status=StatusType.SUCCESS,
               instance_id=1,
               seed=1,
               budget=2)

        self.assertEqual(rh.get_cost(config1), 20)
        self.assertEqual(rh.get_min_cost(config1), 10)
Beispiel #5
0
    def test_full_update(self):
        rh = RunHistory(overwrite_existing_runs=True)
        cs = get_config_space()
        config1 = Configuration(cs, values={"a": 1, "b": 2})
        config2 = Configuration(cs, values={"a": 1, "b": 3})
        rh.add(
            config=config1,
            cost=[10, 40],
            time=20,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
        )

        rh.add(
            config=config1,
            cost=[0, 100],
            time=20,
            status=StatusType.SUCCESS,
            instance_id=2,
            seed=2,
        )

        rh.add(
            config=config2,
            cost=[10, 40],
            time=20,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
        )

        rh.add(
            config=config2,
            cost=[20, 80],
            time=20,
            status=StatusType.SUCCESS,
            instance_id=2,
            seed=2,
        )

        cost_config2 = rh.get_cost(config2)

        rh.compute_all_costs()
        updated_cost_config2 = rh.get_cost(config2)

        self.assertEqual(cost_config2, updated_cost_config2)

        rh.compute_all_costs(instances=[2])
        updated_cost_config2 = rh.get_cost(config2)

        self.assertAlmostEqual(updated_cost_config2, 0.833, places=3)
Beispiel #6
0
    def test_multi_config_design(self):
        stats = Stats(scenario=self.scenario)
        stats.start_timing()
        self.ta.stats = stats
        tj = TrajLogger(output_dir=None, stats=stats)
        rh = RunHistory(aggregate_func=average_cost)
        self.ta.runhistory = rh
        rng = np.random.RandomState(seed=12345)

        intensifier = Intensifier(tae_runner=self.ta,
                                  stats=stats,
                                  traj_logger=tj,
                                  rng=rng,
                                  instances=[None],
                                  run_obj_time=False)

        configs = [
            Configuration(configuration_space=self.cs, values={"x1": 4}),
            Configuration(configuration_space=self.cs, values={"x1": 2})
        ]
        dc = MultiConfigInitialDesign(tae_runner=self.ta,
                                      scenario=self.scenario,
                                      stats=stats,
                                      traj_logger=tj,
                                      runhistory=rh,
                                      rng=rng,
                                      configs=configs,
                                      intensifier=intensifier,
                                      aggregate_func=average_cost)

        inc = dc.run()
        self.assertTrue(stats.ta_runs == 2)
        self.assertTrue(len(rh.data) == 2)
        self.assertTrue(rh.get_cost(inc) == 4)
    def process_results(self,
                        run_info: RunInfo,
                        incumbent: typing.Optional[Configuration],
                        run_history: RunHistory,
                        time_bound: float,
                        result: RunValue,
                        log_traj: bool = True,
                        ) -> \
            typing.Tuple[Configuration, float]:
        """
        The intensifier stage will be updated based on the results/status
        of a configuration execution.
        Also, a incumbent will be determined.

        Parameters
        ----------
        run_info : RunInfo
               A RunInfo containing the configuration that was evaluated
        incumbent : typing.Optional[Configuration]
            Best configuration seen so far
        run_history : RunHistory
            stores all runs we ran so far
            if False, an evaluated configuration will not be generated again
        time_bound : float
            time in [sec] available to perform intensify
        result: RunValue
            Contain the result (status and other methadata) of exercising
            a challenger/incumbent.
        log_traj: bool
            Whether to log changes of incumbents in trajectory

        Returns
        -------
        incumbent: Configuration()
            current (maybe new) incumbent configuration
        inc_perf: float
            empirical performance of incumbent configuration
        """
        # Mark the fact that we processed this configuration
        self.run_tracker[(run_info.config, run_info.instance, run_info.seed,
                          run_info.budget)] = True

        # If The incumbent is None we use the challenger
        if not incumbent:
            self.logger.info(
                "First run, no incumbent provided; challenger is assumed to be the incumbent"
            )
            incumbent = run_info.config

        self.num_run += 1

        incumbent = self._compare_configs(challenger=run_info.config,
                                          incumbent=incumbent,
                                          run_history=run_history,
                                          log_traj=log_traj)
        # get incumbent cost
        inc_perf = run_history.get_cost(incumbent)

        return incumbent, inc_perf
Beispiel #8
0
    def test_incremental_update(self):

        rh = RunHistory(aggregate_func=average_cost)
        cs = get_config_space()
        config1 = Configuration(cs,
                                values={'a': 1, 'b': 2})

        rh.add(config=config1, cost=10, time=20,
               status=StatusType.SUCCESS, instance_id=1,
               seed=1)

        self.assertTrue(rh.get_cost(config1) == 10)

        rh.add(config=config1, cost=20, time=20,
               status=StatusType.SUCCESS, instance_id=2,
               seed=1)

        self.assertTrue(rh.get_cost(config1) == 15)
Beispiel #9
0
    def test_merge_foreign_data(self):
        ''' test smac.utils.merge_foreign_data '''

        scenario = Scenario(self.test_scenario_dict)
        scenario_2 = Scenario(self.test_scenario_dict)
        scenario_2.feature_dict = {"inst_new": [4]}

        # init cs
        cs = ConfigurationSpace()
        cs.add_hyperparameter(UniformIntegerHyperparameter(name='a',
                                                           lower=0,
                                                           upper=100))
        cs.add_hyperparameter(UniformIntegerHyperparameter(name='b',
                                                           lower=0,
                                                           upper=100))
        # build runhistory
        rh_merge = RunHistory()
        config = Configuration(cs, values={'a': 1, 'b': 2})

        rh_merge.add(config=config, instance_id="inst_new", cost=10, time=20,
                     status=StatusType.SUCCESS,
                     seed=None,
                     additional_info=None)

        # "d" is an instance in <scenario>
        rh_merge.add(config=config, instance_id="d", cost=5, time=20,
                     status=StatusType.SUCCESS,
                     seed=None,
                     additional_info=None)

        # build empty rh
        rh_base = RunHistory()

        merge_foreign_data(scenario=scenario, runhistory=rh_base,
                           in_scenario_list=[scenario_2], in_runhistory_list=[rh_merge])

        # both runs should be in the runhistory
        # but we should not use the data to update the cost of config
        self.assertTrue(len(rh_base.data) == 2)
        self.assertTrue(np.isnan(rh_base.get_cost(config)))

        # we should not get direct access to external run data
        runs = rh_base.get_runs_for_config(config, only_max_observed_budget=True)
        self.assertTrue(len(runs) == 0)

        rh_merge.add(config=config, instance_id="inst_new_2", cost=10, time=20,
                     status=StatusType.SUCCESS,
                     seed=None,
                     additional_info=None)

        self.assertRaises(ValueError, merge_foreign_data, **{
                          "scenario": scenario, "runhistory": rh_base,
                          "in_scenario_list": [scenario_2], "in_runhistory_list": [rh_merge]})
Beispiel #10
0
    def test_incremental_update(self):

        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs, values={"a": 1, "b": 2})

        rh.add(
            config=config1,
            cost=[10, 100],
            time=20,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
        )

        self.assertEqual(rh.get_cost(config1), 1.0)

        rh.add(
            config=config1,
            cost=[20, 50],
            time=20,
            status=StatusType.SUCCESS,
            instance_id=2,
            seed=1,
        )

        # We except 0.75 because of moving average
        # First we have 1 and then 0.5, the moving average is then 0.75
        self.assertEqual(rh.get_cost(config1), 0.75)

        rh.add(
            config=config1,
            cost=[0, 100],
            time=20,
            status=StatusType.SUCCESS,
            instance_id=3,
            seed=1,
        )

        self.assertAlmostEqual(rh.get_cost(config1), 0.694, places=3)
Beispiel #11
0
    def _process_inc_run(
        self,
        incumbent: Configuration,
        run_history: RunHistory,
        log_traj: bool = True,
    ) -> None:
        """Method to process the results of a challenger that races
        an incumbent

        Parameters
        ----------
        incumbent: Configuration
            Either challenger or incumbent
        run_history : RunHistory
            stores all runs we ran so far
        log_traj: bool
            Whether to log changes of incumbents in trajectory

        """
        # output estimated performance of incumbent
        inc_runs = run_history.get_runs_for_config(
            incumbent, only_max_observed_budget=True)
        inc_perf = run_history.get_cost(incumbent)
        format_value = format_array(inc_perf)
        self.logger.info(
            f"Updated estimated cost of incumbent on {len(inc_runs)} runs: {format_value}"
        )

        # if running first configuration, go to next stage after 1st run
        if self.stage in [
                IntensifierStage.RUN_FIRST_CONFIG,
                IntensifierStage.PROCESS_FIRST_CONFIG_RUN
        ]:
            self.stage = IntensifierStage.RUN_INCUMBENT
            self._next_iteration()
        else:
            # Termination condition; after each run, this checks
            # whether further runs are necessary due to minR
            if len(inc_runs) >= self.minR or len(inc_runs) >= self.maxR:
                self.stage = IntensifierStage.RUN_CHALLENGER
            else:
                self.stage = IntensifierStage.RUN_INCUMBENT

        self._compare_configs(incumbent=incumbent,
                              challenger=incumbent,
                              run_history=run_history,
                              log_traj=log_traj)
Beispiel #12
0
    def _top_k(self, configs: typing.List[Configuration],
               run_history: RunHistory, k: int) -> typing.List[Configuration]:
        """
        Selects the top 'k' configurations from the given list based on their performance.

        This retrieves the performance for each configuration from the runhistory and checks
        that the highest budget they've been evaluated on is the same for each of the configurations.

        Parameters
        ----------
        configs: typing.List[Configuration]
            list of configurations to filter from
        run_history: smac.runhistory.runhistory.RunHistory
            stores all runs we ran so far
        k: int
            number of configurations to select

        Returns
        -------
        typing.List[Configuration]
            top challenger configurations, sorted in increasing costs
        """
        # extracting costs for each given configuration
        config_costs = {}
        # sample list instance-seed-budget key to act as base
        run_key = run_history.get_runs_for_config(
            configs[0], only_max_observed_budget=True)
        for c in configs:
            # ensuring that all configurations being compared are run on the same set of instance, seed & budget
            cur_run_key = run_history.get_runs_for_config(
                c, only_max_observed_budget=True)
            if cur_run_key != run_key:
                raise ValueError(
                    'Cannot compare configs that were run on different instances-seeds-budgets: %s vs %s'
                    % (run_key, cur_run_key))
            config_costs[c] = run_history.get_cost(c)

        configs_sorted = sorted(config_costs, key=config_costs.get)
        # select top configurations only
        top_configs = configs_sorted[:k]
        return top_configs
Beispiel #13
0
    def run_smbo(self, max_iters=1000):
        global evaluator

        # == first things first: load the datamanager
        self.reset_data_manager()

        # == Initialize SMBO stuff
        # first create a scenario
        seed = self.seed  # TODO
        num_params = len(self.config_space.get_hyperparameters())
        # allocate a run history
        run_history = RunHistory()
        meta_runhistory = RunHistory()
        meta_runs_dataset_indices = {}
        num_run = self.start_num_run
        instance_id = self.dataset_name + SENTINEL

        # == Train on subset
        #    before doing anything, let us run the default_cfg
        #    on a subset of the available data to ensure that
        #    we at least have some models
        #    we will try three different ratios of decreasing magnitude
        #    in the hope that at least on the last one we will be able
        #    to get a model
        n_data = self.datamanager.data['X_train'].shape[0]
        subset_ratio = 10000. / n_data
        if subset_ratio >= 0.5:
            subset_ratio = 0.33
            subset_ratios = [subset_ratio, subset_ratio * 0.10]
        else:
            subset_ratios = [subset_ratio, 500. / n_data]
        self.logger.info("Training default configurations on a subset of "
                         "%d/%d data points." %
                         (int(n_data * subset_ratio), n_data))

        # the time limit for these function evaluations is rigorously
        # set to only 1/2 of a full function evaluation
        subset_time_limit = max(5, int(self.func_eval_time_limit / 2))
        # the configs we want to run on the data subset are:
        # 1) the default configs
        # 2) a set of configs we selected for training on a subset
        subset_configs = [self.config_space.get_default_configuration()] \
                          + self.collect_additional_subset_defaults()
        subset_config_succesful = [False] * len(subset_configs)
        for subset_config_id, next_config in enumerate(subset_configs):
            for i, ratio in enumerate(subset_ratios):
                self.reset_data_manager()
                n_data_subsample = int(n_data * ratio)

                # run the config, but throw away the result afterwards
                # since this cfg was evaluated only on a subset
                # and we don't want  to confuse SMAC
                self.logger.info(
                    "Starting to evaluate %d on SUBSET "
                    "with size %d and time limit %ds.", num_run,
                    n_data_subsample, subset_time_limit)
                self.logger.info(next_config)
                _info = eval_with_limits(self.datamanager, self.tmp_dir,
                                         next_config, seed, num_run,
                                         self.resampling_strategy,
                                         self.resampling_strategy_args,
                                         self.memory_limit, subset_time_limit,
                                         n_data_subsample)
                (duration, result, _, additional_run_info, status) = _info
                self.logger.info(
                    "Finished evaluating %d. configuration on SUBSET. "
                    "Duration %f; loss %f; status %s; additional run "
                    "info: %s ", num_run, duration, result, str(status),
                    additional_run_info)

                num_run += 1
                if i < len(subset_ratios) - 1:
                    if status != StatusType.SUCCESS:
                        # Do not increase num_run here, because we will try
                        # the same configuration with less data
                        self.logger.info(
                            "A CONFIG did not finish "
                            " for subset ratio %f -> going smaller", ratio)
                        continue
                    else:
                        self.logger.info(
                            "Finished SUBSET training sucessfully "
                            "with ratio %f", ratio)
                        subset_config_succesful[subset_config_id] = True
                        break
                else:
                    if status != StatusType.SUCCESS:
                        self.logger.info(
                            "A CONFIG did not finish "
                            " for subset ratio %f.", ratio)
                        continue
                    else:
                        self.logger.info(
                            "Finished SUBSET training sucessfully "
                            "with ratio %f", ratio)
                        subset_config_succesful[subset_config_id] = True
                        break

        # Use the first non-failing configuration from the subsets as the new
        #  default configuration -> this guards us against the random forest
        # failing on large, sparse datasets
        default_cfg = None
        for subset_config_id, next_config in enumerate(subset_configs):
            if subset_config_succesful[subset_config_id]:
                default_cfg = next_config
                break
        if default_cfg is None:
            default_cfg = self.config_space.get_default_configuration()

        # == METALEARNING suggestions
        # we start by evaluating the defaults on the full dataset again
        # and add the suggestions from metalearning behind it

        if self.metadata_directory is None:
            metalearning_directory = os.path.dirname(
                autosklearn.metalearning.__file__)
            # There is no multilabel data in OpenML
            if self.task == MULTILABEL_CLASSIFICATION:
                meta_task = BINARY_CLASSIFICATION
            else:
                meta_task = self.task
            metadata_directory = os.path.join(
                metalearning_directory, 'files', '%s_%s_%s' %
                (METRIC_TO_STRING[self.metric],
                 TASK_TYPES_TO_STRING[meta_task],
                 'sparse' if self.datamanager.info['is_sparse'] else 'dense'))
            self.metadata_directory = metadata_directory

        self.logger.info('Metadata directory: %s', self.metadata_directory)
        meta_base = MetaBase(self.config_space, self.metadata_directory)

        metafeature_calculation_time_limit = int(self.total_walltime_limit / 4)
        metafeature_calculation_start_time = time.time()
        meta_features = self._calculate_metafeatures_with_limits(
            metafeature_calculation_time_limit)
        metafeature_calculation_end_time = time.time()
        metafeature_calculation_time_limit = \
            metafeature_calculation_time_limit - (
            metafeature_calculation_end_time -
            metafeature_calculation_start_time)

        if metafeature_calculation_time_limit < 1:
            self.logger.warning(
                'Time limit for metafeature calculation less '
                'than 1 seconds (%f). Skipping calculation '
                'of metafeatures for encoded dataset.',
                metafeature_calculation_time_limit)
            meta_features_encoded = None
        else:
            self.datamanager.perform1HotEncoding()
            meta_features_encoded = \
                self._calculate_metafeatures_encoded_with_limits(
                    metafeature_calculation_time_limit)

        # In case there is a problem calculating the encoded meta-features
        if meta_features is None:
            if meta_features_encoded is not None:
                meta_features = meta_features_encoded
        else:
            if meta_features_encoded is not None:
                meta_features.metafeature_values.update(
                    meta_features_encoded.metafeature_values)

        if meta_features is not None:
            meta_base.add_dataset(instance_id, meta_features)
            # Do mean imputation of the meta-features - should be done specific
            # for each prediction model!
            all_metafeatures = meta_base.get_metafeatures(
                features=list(meta_features.keys()))
            all_metafeatures.fillna(all_metafeatures.mean(), inplace=True)

            metalearning_configurations = self.collect_metalearning_suggestions(
                meta_base)
            if metalearning_configurations is None:
                metalearning_configurations = []
            self.reset_data_manager()

            self.logger.info('%s', meta_features)

            # Convert meta-features into a dictionary because the scenario
            # expects a dictionary
            meta_features_dict = {}
            for dataset, series in all_metafeatures.iterrows():
                meta_features_dict[dataset] = series.values
            meta_features_list = []
            for meta_feature_name in all_metafeatures.columns:
                meta_features_list.append(
                    meta_features[meta_feature_name].value)
            meta_features_list = np.array(meta_features_list).reshape((1, -1))
            self.logger.info(list(meta_features_dict.keys()))

            meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric])
            meta_runs_index = 0
            try:
                meta_durations = meta_base.get_all_runs('runtime')
                read_runtime_data = True
            except KeyError:
                read_runtime_data = False
                self.logger.critical('Cannot read runtime data.')
                if self.acquisition_function == 'EIPS':
                    self.logger.critical(
                        'Reverting to acquisition function EI!')
                    self.acquisition_function = 'EI'

            for meta_dataset in meta_runs.index:
                meta_dataset_start_index = meta_runs_index
                for meta_configuration in meta_runs.columns:
                    if np.isfinite(meta_runs.loc[meta_dataset,
                                                 meta_configuration]):
                        try:
                            config = meta_base.get_configuration_from_algorithm_index(
                                meta_configuration)
                            cost = meta_runs.loc[meta_dataset,
                                                 meta_configuration]
                            if read_runtime_data:
                                runtime = meta_durations.loc[
                                    meta_dataset, meta_configuration]
                            else:
                                runtime = 1
                            # TODO read out other status types!
                            meta_runhistory.add(config,
                                                cost,
                                                runtime,
                                                StatusType.SUCCESS,
                                                instance_id=meta_dataset)
                            meta_runs_index += 1
                        except:
                            # TODO maybe add warning
                            pass

                meta_runs_dataset_indices[meta_dataset] = (
                    meta_dataset_start_index, meta_runs_index)
        else:
            if self.acquisition_function == 'EIPS':
                self.logger.critical('Reverting to acquisition function EI!')
                self.acquisition_function = 'EI'
            meta_features_list = []
            meta_features_dict = {}
            metalearning_configurations = []

        self.scenario = AutoMLScenario(self.config_space,
                                       self.total_walltime_limit,
                                       self.func_eval_time_limit,
                                       meta_features_dict, self.tmp_dir,
                                       self.shared_mode)

        types = get_types(self.config_space, self.scenario.feature_array)
        if self.acquisition_function == 'EI':
            rh2EPM = RunHistory2EPM4Cost(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=None,
                                         impute_censored_data=False,
                                         impute_state=None)
            model = RandomForestWithInstances(
                types,
                instance_features=meta_features_list,
                seed=1,
                num_trees=10)
            smac = SMBO(self.scenario, model=model, rng=seed)
        elif self.acquisition_function == 'EIPS':
            rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=None,
                                         impute_censored_data=False,
                                         impute_state=None)
            model = UncorrelatedMultiObjectiveRandomForestWithInstances(
                ['cost', 'runtime'],
                types,
                num_trees=10,
                instance_features=meta_features_list,
                seed=1)
            acquisition_function = EIPS(model)
            smac = SMBO(self.scenario,
                        acquisition_function=acquisition_function,
                        model=model,
                        runhistory2epm=rh2EPM,
                        rng=seed)
        else:
            raise ValueError('Unknown acquisition function value %s!' %
                             self.acquisition_function)

        # Build a runtime model
        # runtime_rf = RandomForestWithInstances(types,
        #                                        instance_features=meta_features_list,
        #                                        seed=1, num_trees=10)
        # runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
        #                                      scenario=self.scenario,
        #                                      success_states=None,
        #                                      impute_censored_data=False,
        #                                      impute_state=None)
        # X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory)
        # runtime_rf.train(X_runtime, y_runtime[:, 1].flatten())
        X_meta, Y_meta = rh2EPM.transform(meta_runhistory)
        # Transform Y_meta on a per-dataset base
        for meta_dataset in meta_runs_dataset_indices:
            start_index, end_index = meta_runs_dataset_indices[meta_dataset]
            end_index += 1  # Python indexing
            Y_meta[start_index:end_index, 0]\
                [Y_meta[start_index:end_index, 0] >2.0] =  2.0
            dataset_minimum = np.min(Y_meta[start_index:end_index, 0])
            Y_meta[start_index:end_index,
                   0] = 1 - ((1. - Y_meta[start_index:end_index, 0]) /
                             (1. - dataset_minimum))
            Y_meta[start_index:end_index, 0]\
                  [Y_meta[start_index:end_index, 0] > 2] = 2

        # == first, evaluate all metelearning and default configurations
        for i, next_config in enumerate(
            ([default_cfg] + metalearning_configurations)):
            # Do not evaluate default configurations more than once
            if i >= len([default_cfg]) and next_config in [default_cfg]:
                continue

            config_name = 'meta-learning' if i >= len([default_cfg]) \
                else 'default'

            self.logger.info(
                "Starting to evaluate %d. configuration "
                "(%s configuration) with time limit %ds.", num_run,
                config_name, self.func_eval_time_limit)
            self.logger.info(next_config)
            self.reset_data_manager()
            info = eval_with_limits(self.datamanager, self.tmp_dir,
                                    next_config, seed, num_run,
                                    self.resampling_strategy,
                                    self.resampling_strategy_args,
                                    self.memory_limit,
                                    self.func_eval_time_limit)
            (duration, result, _, additional_run_info, status) = info
            run_history.add(config=next_config,
                            cost=result,
                            time=duration,
                            status=status,
                            instance_id=instance_id,
                            seed=seed)
            run_history.update_cost(next_config, result)
            self.logger.info(
                "Finished evaluating %d. configuration. "
                "Duration %f; loss %f; status %s; additional run "
                "info: %s ", num_run, duration, result, str(status),
                additional_run_info)
            num_run += 1
            if smac.incumbent is None:
                smac.incumbent = next_config
            elif result < run_history.get_cost(smac.incumbent):
                smac.incumbent = next_config

            if self.scenario.shared_model:
                pSMAC.write(run_history=run_history,
                            output_directory=self.scenario.output_dir,
                            num_run=self.seed)

        # == after metalearning run SMAC loop
        smac.runhistory = run_history
        smac_iter = 0
        finished = False
        while not finished:
            if self.scenario.shared_model:
                pSMAC.read(run_history=run_history,
                           output_directory=self.scenario.output_dir,
                           configuration_space=self.config_space,
                           logger=self.logger)

            next_configs = []
            time_for_choose_next = -1
            try:
                X_cfg, Y_cfg = rh2EPM.transform(run_history)

                if not run_history.empty():
                    # Update costs by normalization
                    dataset_minimum = np.min(Y_cfg[:, 0])
                    Y_cfg[:, 0] = 1 - ((1. - Y_cfg[:, 0]) /
                                       (1. - dataset_minimum))
                    Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2

                if len(X_meta) > 0 and len(X_cfg) > 0:
                    pass
                    #X_cfg = np.concatenate((X_meta, X_cfg))
                    #Y_cfg = np.concatenate((Y_meta, Y_cfg))
                elif len(X_meta) > 0:
                    X_cfg = X_meta.copy()
                    Y_cfg = Y_meta.copy()
                elif len(X_cfg) > 0:
                    X_cfg = X_cfg.copy()
                    Y_cfg = Y_cfg.copy()
                else:
                    raise ValueError(
                        'No training data for SMAC random forest!')

                self.logger.info('Using %d training points for SMAC.' %
                                 X_cfg.shape[0])
                choose_next_start_time = time.time()
                next_configs_tmp = smac.choose_next(
                    X_cfg,
                    Y_cfg,
                    num_interleaved_random=110,
                    num_configurations_by_local_search=10,
                    num_configurations_by_random_search_sorted=100)
                time_for_choose_next = time.time() - choose_next_start_time
                self.logger.info('Used %g seconds to find next '
                                 'configurations' % (time_for_choose_next))
                next_configs.extend(next_configs_tmp)
            # TODO put Exception here!
            except Exception as e:
                self.logger.error(e)
                self.logger.error("Error in getting next configurations "
                                  "with SMAC. Using random configuration!")
                next_config = self.config_space.sample_configuration()
                next_configs.append(next_config)

            models_fitted_this_iteration = 0
            start_time_this_iteration = time.time()
            for next_config in next_configs:
                x_runtime = impute_inactive_values(next_config)
                x_runtime = impute_inactive_values(x_runtime).get_array()
                # predicted_runtime = runtime_rf.predict_marginalized_over_instances(
                #     x_runtime.reshape((1, -1)))
                # predicted_runtime = np.exp(predicted_runtime[0][0][0]) - 1

                self.logger.info(
                    "Starting to evaluate %d. configuration (from "
                    "SMAC) with time limit %ds.", num_run,
                    self.func_eval_time_limit)
                self.logger.info(next_config)
                self.reset_data_manager()
                info = eval_with_limits(self.datamanager, self.tmp_dir,
                                        next_config, seed, num_run,
                                        self.resampling_strategy,
                                        self.resampling_strategy_args,
                                        self.memory_limit,
                                        self.func_eval_time_limit)
                (duration, result, _, additional_run_info, status) = info
                run_history.add(config=next_config,
                                cost=result,
                                time=duration,
                                status=status,
                                instance_id=instance_id,
                                seed=seed)
                run_history.update_cost(next_config, result)

                #self.logger.info('Predicted runtime %g, true runtime %g',
                #                 predicted_runtime, duration)

                # TODO add unittest to make sure everything works fine and
                # this does not get outdated!
                if smac.incumbent is None:
                    smac.incumbent = next_config
                elif result < run_history.get_cost(smac.incumbent):
                    smac.incumbent = next_config

                self.logger.info(
                    "Finished evaluating %d. configuration. "
                    "Duration: %f; loss: %f; status %s; additional "
                    "run info: %s ", num_run, duration, result, str(status),
                    additional_run_info)
                smac_iter += 1
                num_run += 1

                models_fitted_this_iteration += 1
                time_used_this_iteration = time.time(
                ) - start_time_this_iteration
                if models_fitted_this_iteration >= 2 and \
                        time_for_choose_next > 0 and \
                        time_used_this_iteration > time_for_choose_next:
                    break
                elif time_for_choose_next <= 0 and \
                        models_fitted_this_iteration >= 1:
                    break
                elif models_fitted_this_iteration >= 50:
                    break

                if max_iters is not None:
                    finished = (smac_iter < max_iters)

            if self.scenario.shared_model:
                pSMAC.write(run_history=run_history,
                            output_directory=self.scenario.output_dir,
                            num_run=self.seed)
Beispiel #14
0
    def run_smbo(self, max_iters=1000):
        global evaluator

        # == first things first: load the datamanager
        self.reset_data_manager()
        
        # == Initialize SMBO stuff
        # first create a scenario
        seed = self.seed # TODO
        num_params = len(self.config_space.get_hyperparameters())
        # allocate a run history
        run_history = RunHistory()
        meta_runhistory = RunHistory()
        meta_runs_dataset_indices = {}
        num_run = self.start_num_run
        instance_id = self.dataset_name + SENTINEL

        # == Train on subset
        #    before doing anything, let us run the default_cfg
        #    on a subset of the available data to ensure that
        #    we at least have some models
        #    we will try three different ratios of decreasing magnitude
        #    in the hope that at least on the last one we will be able
        #    to get a model
        n_data = self.datamanager.data['X_train'].shape[0]
        subset_ratio = 10000. / n_data
        if subset_ratio >= 0.5:
            subset_ratio = 0.33
            subset_ratios = [subset_ratio, subset_ratio * 0.10]
        else:
            subset_ratios = [subset_ratio, 500. / n_data]
        self.logger.info("Training default configurations on a subset of "
                         "%d/%d data points." %
                         (int(n_data * subset_ratio), n_data))

        # the time limit for these function evaluations is rigorously
        # set to only 1/2 of a full function evaluation
        subset_time_limit = max(5, int(self.func_eval_time_limit / 2))
        # the configs we want to run on the data subset are:
        # 1) the default configs
        # 2) a set of configs we selected for training on a subset
        subset_configs = [self.config_space.get_default_configuration()] \
                          + self.collect_additional_subset_defaults()
        subset_config_succesful = [False] * len(subset_configs)
        for subset_config_id, next_config in enumerate(subset_configs):
            for i, ratio in enumerate(subset_ratios):
                self.reset_data_manager()
                n_data_subsample = int(n_data * ratio)

                # run the config, but throw away the result afterwards
                # since this cfg was evaluated only on a subset
                # and we don't want  to confuse SMAC
                self.logger.info("Starting to evaluate %d on SUBSET "
                                 "with size %d and time limit %ds.",
                                 num_run, n_data_subsample,
                                 subset_time_limit)
                self.logger.info(next_config)
                _info = eval_with_limits(
                    self.datamanager, self.tmp_dir, next_config,
                    seed, num_run,
                    self.resampling_strategy,
                    self.resampling_strategy_args,
                    self.memory_limit,
                    subset_time_limit, n_data_subsample)
                (duration, result, _, additional_run_info, status) = _info
                self.logger.info("Finished evaluating %d. configuration on SUBSET. "
                                 "Duration %f; loss %f; status %s; additional run "
                                 "info: %s ", num_run, duration, result,
                                 str(status), additional_run_info)

                num_run += 1
                if i < len(subset_ratios) - 1:
                    if status != StatusType.SUCCESS:
                        # Do not increase num_run here, because we will try
                        # the same configuration with less data
                        self.logger.info("A CONFIG did not finish "
                                         " for subset ratio %f -> going smaller",
                                         ratio)
                        continue
                    else:
                        self.logger.info("Finished SUBSET training sucessfully "
                                         "with ratio %f", ratio)
                        subset_config_succesful[subset_config_id] = True
                        break
                else:
                    if status != StatusType.SUCCESS:
                        self.logger.info("A CONFIG did not finish "
                                         " for subset ratio %f.",
                                         ratio)
                        continue
                    else:
                        self.logger.info("Finished SUBSET training sucessfully "
                                         "with ratio %f", ratio)
                        subset_config_succesful[subset_config_id] = True
                        break

        # Use the first non-failing configuration from the subsets as the new
        #  default configuration -> this guards us against the random forest
        # failing on large, sparse datasets
        default_cfg = None
        for subset_config_id, next_config in enumerate(subset_configs):
            if subset_config_succesful[subset_config_id]:
                default_cfg = next_config
                break
        if default_cfg is None:
            default_cfg = self.config_space.get_default_configuration()

        # == METALEARNING suggestions
        # we start by evaluating the defaults on the full dataset again
        # and add the suggestions from metalearning behind it

        if self.metadata_directory is None:
            metalearning_directory = os.path.dirname(
                autosklearn.metalearning.__file__)
            # There is no multilabel data in OpenML
            if self.task == MULTILABEL_CLASSIFICATION:
                meta_task = BINARY_CLASSIFICATION
            else:
                meta_task = self.task
            metadata_directory = os.path.join(
                metalearning_directory, 'files',
                '%s_%s_%s' % (METRIC_TO_STRING[self.metric],
                              TASK_TYPES_TO_STRING[meta_task],
                              'sparse' if self.datamanager.info['is_sparse']
                              else 'dense'))
            self.metadata_directory = metadata_directory

        self.logger.info('Metadata directory: %s', self.metadata_directory)
        meta_base = MetaBase(self.config_space, self.metadata_directory)

        metafeature_calculation_time_limit = int(
            self.total_walltime_limit / 4)
        metafeature_calculation_start_time = time.time()
        meta_features = self._calculate_metafeatures_with_limits(
            metafeature_calculation_time_limit)
        metafeature_calculation_end_time = time.time()
        metafeature_calculation_time_limit = \
            metafeature_calculation_time_limit - (
            metafeature_calculation_end_time -
            metafeature_calculation_start_time)

        if metafeature_calculation_time_limit < 1:
            self.logger.warning('Time limit for metafeature calculation less '
                                'than 1 seconds (%f). Skipping calculation '
                                'of metafeatures for encoded dataset.',
                                metafeature_calculation_time_limit)
            meta_features_encoded = None
        else:
            self.datamanager.perform1HotEncoding()
            meta_features_encoded = \
                self._calculate_metafeatures_encoded_with_limits(
                    metafeature_calculation_time_limit)

        # In case there is a problem calculating the encoded meta-features
        if meta_features is None:
            if meta_features_encoded is not None:
                meta_features = meta_features_encoded
        else:
            if meta_features_encoded is not None:
                meta_features.metafeature_values.update(
                    meta_features_encoded.metafeature_values)

        if meta_features is not None:
            meta_base.add_dataset(instance_id, meta_features)
            # Do mean imputation of the meta-features - should be done specific
            # for each prediction model!
            all_metafeatures = meta_base.get_metafeatures(
                features=list(meta_features.keys()))
            all_metafeatures.fillna(all_metafeatures.mean(), inplace=True)

            metalearning_configurations = self.collect_metalearning_suggestions(
                meta_base)
            if metalearning_configurations is None:
                metalearning_configurations = []
            self.reset_data_manager()

            self.logger.info('%s', meta_features)

            # Convert meta-features into a dictionary because the scenario
            # expects a dictionary
            meta_features_dict = {}
            for dataset, series in all_metafeatures.iterrows():
                meta_features_dict[dataset] = series.values
            meta_features_list = []
            for meta_feature_name in all_metafeatures.columns:
                meta_features_list.append(meta_features[meta_feature_name].value)
            meta_features_list = np.array(meta_features_list).reshape((1, -1))
            self.logger.info(list(meta_features_dict.keys()))

            meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric])
            meta_runs_index = 0
            try:
                meta_durations = meta_base.get_all_runs('runtime')
                read_runtime_data = True
            except KeyError:
                read_runtime_data = False
                self.logger.critical('Cannot read runtime data.')
                if self.acquisition_function == 'EIPS':
                    self.logger.critical('Reverting to acquisition function EI!')
                    self.acquisition_function = 'EI'

            for meta_dataset in meta_runs.index:
                meta_dataset_start_index = meta_runs_index
                for meta_configuration in meta_runs.columns:
                    if np.isfinite(meta_runs.loc[meta_dataset, meta_configuration]):
                        try:
                            config = meta_base.get_configuration_from_algorithm_index(
                                meta_configuration)
                            cost = meta_runs.loc[meta_dataset, meta_configuration]
                            if read_runtime_data:
                                runtime = meta_durations.loc[meta_dataset,
                                                             meta_configuration]
                            else:
                                runtime = 1
                            # TODO read out other status types!
                            meta_runhistory.add(config, cost, runtime,
                                                StatusType.SUCCESS,
                                                instance_id=meta_dataset)
                            meta_runs_index += 1
                        except:
                            # TODO maybe add warning
                            pass

                meta_runs_dataset_indices[meta_dataset] = (
                    meta_dataset_start_index, meta_runs_index)
        else:
            if self.acquisition_function == 'EIPS':
                self.logger.critical('Reverting to acquisition function EI!')
                self.acquisition_function = 'EI'
            meta_features_list = []
            meta_features_dict = {}
            metalearning_configurations = []

        self.scenario = AutoMLScenario(self.config_space,
                                       self.total_walltime_limit,
                                       self.func_eval_time_limit,
                                       meta_features_dict,
                                       self.tmp_dir,
                                       self.shared_mode)

        types = get_types(self.config_space, self.scenario.feature_array)
        if self.acquisition_function == 'EI':
            rh2EPM = RunHistory2EPM4Cost(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=None,
                                         impute_censored_data=False,
                                         impute_state=None)
            model = RandomForestWithInstances(types,
                                              instance_features=meta_features_list,
                                              seed=1, num_trees=10)
            smac = SMBO(self.scenario, model=model,
                        rng=seed)
        elif self.acquisition_function == 'EIPS':
            rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=None,
                                         impute_censored_data=False,
                                         impute_state=None)
            model = UncorrelatedMultiObjectiveRandomForestWithInstances(
                ['cost', 'runtime'], types, num_trees = 10,
                instance_features=meta_features_list, seed=1)
            acquisition_function = EIPS(model)
            smac = SMBO(self.scenario,
                        acquisition_function=acquisition_function,
                        model=model, runhistory2epm=rh2EPM, rng=seed)
        else:
            raise ValueError('Unknown acquisition function value %s!' %
                             self.acquisition_function)

        # Build a runtime model
        # runtime_rf = RandomForestWithInstances(types,
        #                                        instance_features=meta_features_list,
        #                                        seed=1, num_trees=10)
        # runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
        #                                      scenario=self.scenario,
        #                                      success_states=None,
        #                                      impute_censored_data=False,
        #                                      impute_state=None)
        # X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory)
        # runtime_rf.train(X_runtime, y_runtime[:, 1].flatten())
        X_meta, Y_meta = rh2EPM.transform(meta_runhistory)
        # Transform Y_meta on a per-dataset base
        for meta_dataset in meta_runs_dataset_indices:
            start_index, end_index = meta_runs_dataset_indices[meta_dataset]
            end_index += 1  # Python indexing
            Y_meta[start_index:end_index, 0]\
                [Y_meta[start_index:end_index, 0] >2.0] =  2.0
            dataset_minimum = np.min(Y_meta[start_index:end_index, 0])
            Y_meta[start_index:end_index, 0] = 1 - (
                (1. - Y_meta[start_index:end_index, 0]) /
                (1. - dataset_minimum))
            Y_meta[start_index:end_index, 0]\
                  [Y_meta[start_index:end_index, 0] > 2] = 2

        # == first, evaluate all metelearning and default configurations
        for i, next_config in enumerate(([default_cfg] +
                                          metalearning_configurations)):
            # Do not evaluate default configurations more than once
            if i >= len([default_cfg]) and next_config in [default_cfg]:
                continue

            config_name = 'meta-learning' if i >= len([default_cfg]) \
                else 'default'

            self.logger.info("Starting to evaluate %d. configuration "
                             "(%s configuration) with time limit %ds.",
                             num_run, config_name, self.func_eval_time_limit)
            self.logger.info(next_config)
            self.reset_data_manager()
            info = eval_with_limits(self.datamanager, self.tmp_dir, next_config,
                                    seed, num_run,
                                    self.resampling_strategy,
                                    self.resampling_strategy_args,
                                    self.memory_limit,
                                    self.func_eval_time_limit)
            (duration, result, _, additional_run_info, status) = info
            run_history.add(config=next_config, cost=result,
                            time=duration , status=status,
                            instance_id=instance_id, seed=seed)
            run_history.update_cost(next_config, result)
            self.logger.info("Finished evaluating %d. configuration. "
                             "Duration %f; loss %f; status %s; additional run "
                             "info: %s ", num_run, duration, result,
                             str(status), additional_run_info)
            num_run += 1
            if smac.incumbent is None:
                smac.incumbent = next_config
            elif result < run_history.get_cost(smac.incumbent):
                smac.incumbent = next_config

            if self.scenario.shared_model:
                pSMAC.write(run_history=run_history,
                            output_directory=self.scenario.output_dir,
                            num_run=self.seed)

        # == after metalearning run SMAC loop
        smac.runhistory = run_history
        smac_iter = 0
        finished = False
        while not finished:
            if self.scenario.shared_model:
                pSMAC.read(run_history=run_history,
                           output_directory=self.scenario.output_dir,
                           configuration_space=self.config_space,
                           logger=self.logger)

            next_configs = []
            time_for_choose_next = -1
            try:
                X_cfg, Y_cfg = rh2EPM.transform(run_history)

                if not run_history.empty():
                    # Update costs by normalization
                    dataset_minimum = np.min(Y_cfg[:, 0])
                    Y_cfg[:, 0] = 1 - ((1. - Y_cfg[:, 0]) /
                                       (1. - dataset_minimum))
                    Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2

                if len(X_meta) > 0 and len(X_cfg) > 0:
                    pass
                    #X_cfg = np.concatenate((X_meta, X_cfg))
                    #Y_cfg = np.concatenate((Y_meta, Y_cfg))
                elif len(X_meta) > 0:
                    X_cfg = X_meta.copy()
                    Y_cfg = Y_meta.copy()
                elif len(X_cfg) > 0:
                    X_cfg = X_cfg.copy()
                    Y_cfg = Y_cfg.copy()
                else:
                    raise ValueError('No training data for SMAC random forest!')

                self.logger.info('Using %d training points for SMAC.' %
                                 X_cfg.shape[0])
                choose_next_start_time = time.time()
                next_configs_tmp = smac.choose_next(X_cfg, Y_cfg,
                                                    num_interleaved_random=110,
                                                    num_configurations_by_local_search=10,
                                                    num_configurations_by_random_search_sorted=100)
                time_for_choose_next = time.time() - choose_next_start_time
                self.logger.info('Used %g seconds to find next '
                                 'configurations' % (time_for_choose_next))
                next_configs.extend(next_configs_tmp)
            # TODO put Exception here!
            except Exception as e:
                self.logger.error(e)
                self.logger.error("Error in getting next configurations "
                                  "with SMAC. Using random configuration!")
                next_config = self.config_space.sample_configuration()
                next_configs.append(next_config)

            models_fitted_this_iteration = 0
            start_time_this_iteration = time.time()
            for next_config in next_configs:
                x_runtime = impute_inactive_values(next_config)
                x_runtime = impute_inactive_values(x_runtime).get_array()
                # predicted_runtime = runtime_rf.predict_marginalized_over_instances(
                #     x_runtime.reshape((1, -1)))
                # predicted_runtime = np.exp(predicted_runtime[0][0][0]) - 1

                self.logger.info("Starting to evaluate %d. configuration (from "
                                 "SMAC) with time limit %ds.", num_run,
                                 self.func_eval_time_limit)
                self.logger.info(next_config)
                self.reset_data_manager()
                info = eval_with_limits(self.datamanager, self.tmp_dir, next_config,
                                        seed, num_run,
                                        self.resampling_strategy,
                                        self.resampling_strategy_args,
                                        self.memory_limit,
                                        self.func_eval_time_limit)
                (duration, result, _, additional_run_info, status) = info
                run_history.add(config=next_config, cost=result,
                                time=duration , status=status,
                                instance_id=instance_id, seed=seed)
                run_history.update_cost(next_config, result)

                #self.logger.info('Predicted runtime %g, true runtime %g',
                #                 predicted_runtime, duration)

                # TODO add unittest to make sure everything works fine and
                # this does not get outdated!
                if smac.incumbent is None:
                    smac.incumbent = next_config
                elif result < run_history.get_cost(smac.incumbent):
                    smac.incumbent = next_config

                self.logger.info("Finished evaluating %d. configuration. "
                                 "Duration: %f; loss: %f; status %s; additional "
                                 "run info: %s ", num_run, duration, result,
                                 str(status), additional_run_info)
                smac_iter += 1
                num_run += 1

                models_fitted_this_iteration += 1
                time_used_this_iteration = time.time() - start_time_this_iteration
                if models_fitted_this_iteration >= 2 and \
                        time_for_choose_next > 0 and \
                        time_used_this_iteration > time_for_choose_next:
                    break
                elif time_for_choose_next <= 0 and \
                        models_fitted_this_iteration >= 1:
                    break
                elif models_fitted_this_iteration >= 50:
                    break

                if max_iters is not None:
                    finished = (smac_iter < max_iters)

            if self.scenario.shared_model:
                pSMAC.write(run_history=run_history,
                            output_directory=self.scenario.output_dir,
                            num_run=self.seed)
Beispiel #15
0
global_original_rh = RunHistory(average_cost)
global_validated_rh = RunHistory(average_cost)
global_epm_rh = RunHistory(average_cost)

for run in runs:
    global_original_rh.update(run.original_runhistory,
                              origin=DataOrigin.INTERNAL)
    global_validated_rh.update(run.original_runhistory,
                               origin=DataOrigin.INTERNAL)
    if run.validated_runhistory:
        global_validated_rh.update(run.validated_runhistory,
                                   origin=DataOrigin.EXTERNAL_SAME_INSTANCES)

global_epm_rh.update(global_validated_rh)
runs = sorted(runs,
              key=lambda run: global_epm_rh.get_cost(run.solver.incumbent))


class TestRunhistory(unittest.TestCase):
    def test_classification(self):
        """Function to test, if random and local runhistory created correctly"""
        # combined = help.combine_runhistories(runs)
        random, local = help.create_random_runhistories(global_original_rh)
        random = str(random.data)
        result = "OrderedDict([(RunKey(config_id=1, instance_id=None, seed=0), RunValue(cost=15.336866, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=2, instance_id=None, seed=0), RunValue(cost=6.042356, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=3, instance_id=None, seed=0), RunValue(cost=76.191931, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=4, instance_id=None, seed=0), RunValue(cost=2.102303, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=5, instance_id=None, seed=0), RunValue(cost=11.997325, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=6, instance_id=None, seed=0), RunValue(cost=27.718052, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=7, instance_id=None, seed=0), RunValue(cost=123.166119, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=8, instance_id=None, seed=0), RunValue(cost=13.556812, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=9, instance_id=None, seed=0), RunValue(cost=3.265477, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=10, instance_id=None, seed=0), RunValue(cost=20.514643, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=11, instance_id=None, seed=0), RunValue(cost=2.522322, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=12, instance_id=None, seed=0), RunValue(cost=20.305772, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=13, instance_id=None, seed=0), RunValue(cost=11.241301, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=14, instance_id=None, seed=0), RunValue(cost=91.710919, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=15, instance_id=None, seed=0), RunValue(cost=20.195214, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=16, instance_id=None, seed=0), RunValue(cost=188.443026, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=17, instance_id=None, seed=0), RunValue(cost=82.295484, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=18, instance_id=None, seed=0), RunValue(cost=31.528635, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=19, instance_id=None, seed=0), RunValue(cost=77.229825, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=20, instance_id=None, seed=0), RunValue(cost=97.872458, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=21, instance_id=None, seed=0), RunValue(cost=140.605172, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=22, instance_id=None, seed=0), RunValue(cost=18.810533, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=23, instance_id=None, seed=0), RunValue(cost=0.933445, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=24, instance_id=None, seed=0), RunValue(cost=14.273374, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={})), (RunKey(config_id=25, instance_id=None, seed=0), RunValue(cost=37.695976, time=-1.0, status=<StatusType.SUCCESS: 1>, additional_info={}))])"
        # self.assertEqual(random, result)

    def test_combination(self):
        """Test if the right combined runhistory is created using random and local runhistory"""
        # combine_random_local
        pass
Beispiel #16
0
    def _plot_parallel_coordinates(
        self,
        original_rh: RunHistory,
        validated_rh: RunHistory,
        validator: Validator,
        scenario: Scenario,
        default: Configuration,
        incumbent: Configuration,
        param_imp: Union[None, Dict[str, float]],
        output_dir: str,
        cs: ConfigurationSpace,
        runtime: bool = False,
    ):
        """
        Parameters:
        -----------
        original_rh: RunHistory
            runhistory that should contain only runs that were executed during search
        validated_rh: RunHistory
            runhistory that may contain as many runs as possible, also external runs.
            this runhistory will be used to build the EPM
        validator: Validator
            validator to be used to estimate costs for configurations
        scenario: Scenario
            scenario object to take instances from
        default, incumbent: Configuration
            default and incumbent, they will surely be displayed
        param_imp: Union[None, Dict[str->float]
            if given, maps parameter-names to importance
        output_dir: str
            output directory for plots
        cs: ConfigurationSpace
            parameter configuration space to be visualized
        runtime: boolean
            runtime will be on logscale
        """
        # Sorting parameters by importance, if possible (choose first executed parameter-importance)
        method, importance = "", {}
        if self.pc_sort_by == 'all':
            self.logger.debug("Sorting by average importance")
            method = 'average'
            for m, i in param_imp.items():
                if i:
                    for p, imp in i.items():
                        if p in importance:
                            importance[p].append(imp)
                        else:
                            importance[p] = [imp]
            importance = {k: sum(v) / len(v) for k, v in importance.items()}
        elif self.pc_sort_by in param_imp:
            method, importance = self.pc_sort_by, param_imp[self.pc_sort_by]
        else:
            self.logger.debug("%s not evaluated.. choosing at random from: %s",
                              self.pc_sort_by, str(list(param_imp.keys())))
            for m, i in param_imp.items():
                if i:
                    method, importance = m, i
                    self.logger.debug("Chose %s", method)
                    break

        hp_names = sorted([hp for hp in cs.get_hyperparameter_names()],
                          key=lambda x: importance.get(x, 0),
                          reverse=True)
        self.logger.debug("Sorted hp's by method \'%s\': %s", method,
                          str(hp_names))

        # To be set
        self.plots = []

        # Define set of configurations (limiting to max and choosing most interesting ones)
        all_configs = original_rh.get_all_configs()
        max_runs_epm = self.max_runs_epm  # Maximum total number of runs considered for epm to limit maximum possible number configs
        max_configs = int(
            max_runs_epm /
            (len(scenario.train_insts) + len(scenario.test_insts)))
        if len(all_configs) > max_configs:
            self.logger.debug(
                "Limiting number of configs to train epm from %d to %d (based on max runs %d) and choosing "
                "the ones with the most runs (for parallel coordinates)",
                len(all_configs), max_configs, max_runs_epm)
            all_configs = sorted(
                all_configs,
                key=lambda c: len(original_rh.get_runs_for_config(c)
                                  ))[:max_configs]
            if not default in all_configs:
                all_configs = [default] + all_configs
            if not incumbent in all_configs:
                all_configs.append(incumbent)

        # Get costs for those configurations
        epm_rh = RunHistory(average_cost)
        epm_rh.update(validated_rh)
        if scenario.feature_dict:  # if instances are available
            epm_rh.update(
                timing(validator.validate_epm)(all_configs,
                                               'train+test',
                                               1,
                                               runhistory=validated_rh))
        config_to_cost = {c: epm_rh.get_cost(c) for c in all_configs}

        pcp = ParallelCoordinatesPlotter(config_to_cost, output_dir, cs,
                                         runtime)

        try:
            plots = [
                pcp.plot_n_configs(
                    self.n_configs,
                    self.get_params(self.params, importance, hp_names))
            ]
            self.logger.debug("Paths to plot(s): %s", str(plots))
            return {'figure': plots}
        except ValueError as err:
            self.logger.debug("Error: %s", str(err))
            return {'else': str(err)}
Beispiel #17
0
class TestIntensify(unittest.TestCase):
    def setUp(self):
        unittest.TestCase.setUp(self)

        self.rh = RunHistory()
        self.cs = get_config_space()
        self.config1 = Configuration(self.cs, values={'a': 0, 'b': 100})
        self.config2 = Configuration(self.cs, values={'a': 100, 'b': 0})
        self.config3 = Configuration(self.cs, values={'a': 100, 'b': 100})

        self.scen = Scenario({
            "cutoff_time": 2,
            'cs': self.cs,
            "run_obj": 'runtime',
            "output_dir": ''
        })
        self.stats = Stats(scenario=self.scen)
        self.stats.start_timing()

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

    def test_race_challenger(self):
        """
           test _race_challenger without adaptive capping
        """
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  run_obj_time=False)

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)
        intensifier.N = 1

        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh)

        self.assertEqual(inc, self.config2)
        self.assertEqual(intensifier.num_run, 1)
        self.assertEqual(intensifier.num_chall_run, 1)

    def test_race_challenger_2(self):
        """
           test _race_challenger with adaptive capping
        """
        def target(x):
            time.sleep(1.5)
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats, run_obj="runtime")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=.001,
                    time=0.001,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=12345,
                    additional_info=None)
        intensifier.N = 1

        # config2 should have a timeout (due to adaptive capping)
        # and config1 should still be the incumbent
        inc = intensifier._race_challenger(
            challenger=self.config2,
            incumbent=self.config1,
            run_history=self.rh,
        )

        self.assertEqual(inc, self.config1)
        self.assertEqual(intensifier.num_run, 1)
        self.assertEqual(intensifier.num_chall_run, 1)

    def test_race_challenger_3(self):
        """
           test _race_challenger with adaptive capping on a previously capped configuration
        """
        def target(config: Configuration, seed: int, instance: str):
            if instance == 1:
                time.sleep(2.1)
            else:
                time.sleep(0.6)
            return (config['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                run_obj="runtime",
                                par_factor=1)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  cutoff=2,
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=0.5,
                    time=.5,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=12345,
                    additional_info=None)

        # config2 should have a timeout (due to adaptive capping)
        # and config1 should still be the incumbent
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config3], chooser=None)
        inc = intensifier._race_challenger(
            challenger=config,
            incumbent=self.config1,
            run_history=self.rh,
        )
        self.assertEqual(inc, self.config1)

        # further run for incumbent
        self.rh.add(config=self.config1,
                    cost=2,
                    time=2,
                    status=StatusType.TIMEOUT,
                    instance_id=2,
                    seed=12345,
                    additional_info=None)

        # give config2 a second chance - now it should run on both instances

        # run on instance 1
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config3], chooser=None)
        inc = intensifier._race_challenger(
            challenger=config,
            incumbent=self.config1,
            run_history=self.rh,
        )

        # run on instance 2
        config, _ = intensifier.get_next_challenger(challengers=[self.config3],
                                                    chooser=None)
        self.assertEqual(config, self.config2)
        self.assertTrue(intensifier.continue_challenger)

        inc = intensifier._race_challenger(
            challenger=config,
            incumbent=self.config1,
            run_history=self.rh,
        )

        # the incumbent should still be config1 because
        # config2 should get on inst 1 a full timeout
        # such that c(config1) = 1.25 and c(config2) close to 1.3
        self.assertEqual(inc, self.config1)
        # the capped run should not be counted in runs_perf_config
        self.assertAlmostEqual(self.rh.num_runs_per_config[2], 2)
        self.assertFalse(intensifier.continue_challenger)

        self.assertEqual(intensifier.num_run, 3)
        self.assertEqual(intensifier.num_chall_run, 3)

    def test_race_challenger_large(self):
        """
           test _race_challenger using solution_quality
        """
        def target(x):
            return 1

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=list(range(10)),
                                  run_obj_time=False,
                                  deterministic=True)

        for i in range(10):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=12345,
                        additional_info=None)

        intensifier.stage = IntensifierStage.RUN_CHALLENGER

        # tie on first instances and then challenger should always win
        # and be returned as inc
        while True:
            config, _ = intensifier.get_next_challenger(
                challengers=[self.config2, self.config3], chooser=None)
            inc = intensifier._race_challenger(
                challenger=config,
                incumbent=self.config1,
                run_history=self.rh,
            )

            # stop when challenger evaluation is over
            if not intensifier.stage == IntensifierStage.RUN_CHALLENGER:
                break

        self.assertEqual(inc, self.config2)
        self.assertEqual(self.rh.get_cost(self.config2), 1)

        # get data for config2 to check that the correct run was performed
        runs = self.rh.get_runs_for_config(self.config2,
                                           only_max_observed_budget=True)
        self.assertEqual(len(runs), 10)

        self.assertEqual(intensifier.num_run, 10)
        self.assertEqual(intensifier.num_chall_run, 10)

    def test_race_challenger_large_blocked_seed(self):
        """
           test _race_challenger whether seeds are blocked for challenger runs
        """
        def target(x):
            return 1

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=list(range(10)),
                                  run_obj_time=False,
                                  deterministic=False)

        for i in range(10):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)

        intensifier.stage = IntensifierStage.RUN_CHALLENGER

        # tie on first instances and then challenger should always win
        # and be returned as inc
        while True:
            config, _ = intensifier.get_next_challenger(
                challengers=[self.config2, self.config3], chooser=None)
            inc = intensifier._race_challenger(
                challenger=config,
                incumbent=self.config1,
                run_history=self.rh,
            )

            # stop when challenger evaluation is over
            if not intensifier.stage == IntensifierStage.RUN_CHALLENGER:
                break

        self.assertEqual(inc, self.config2)
        self.assertEqual(self.rh.get_cost(self.config2), 1)

        # get data for config2 to check that the correct run was performed
        runs = self.rh.get_runs_for_config(self.config2,
                                           only_max_observed_budget=True)
        self.assertEqual(len(runs), 10)

        seeds = sorted([r.seed for r in runs])
        self.assertEqual(seeds, list(range(10)), seeds)

        self.assertEqual(intensifier.num_run, 10)
        self.assertEqual(intensifier.num_chall_run, 10)

    def test_add_inc_run_det(self):
        """
            test _add_inc_run()
        """
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                run_obj="solution_quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  deterministic=True)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

        # since we assume deterministic=1,
        # the second call should not add any more runs
        # given only one instance
        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

        # The following two tests evaluate to zero because _next_iteration is triggered by _add_inc_run
        # as it is the first evaluation of this intensifier
        self.assertEqual(intensifier.num_run, 0)
        self.assertEqual(intensifier.num_chall_run, 0)

    def test_add_inc_run_nondet(self):
        """
            test _add_inc_run()
        """
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                runhistory=self.rh,
                                run_obj="solution_quality")

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1, 2],
                                  deterministic=False)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 2, self.rh.data)
        runs = self.rh.get_runs_for_config(config=self.config1,
                                           only_max_observed_budget=True)
        # exactly one run on each instance
        self.assertIn(1, [runs[0].instance, runs[1].instance])
        self.assertIn(2, [runs[0].instance, runs[1].instance])

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 3, self.rh.data)

        self.assertEqual(intensifier.num_run, 2)
        self.assertEqual(intensifier.num_chall_run, 0)

    def test_get_next_challenger(self):
        """
            test get_next_challenger()
        """
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  deterministic=True)

        intensifier.stage = IntensifierStage.RUN_CHALLENGER

        # get a new challenger to evaluate
        config, new = intensifier.get_next_challenger(
            challengers=[self.config1, self.config2], chooser=None)

        self.assertEqual(config, self.config1, intensifier.current_challenger)
        self.assertEqual(intensifier._chall_indx, 1)
        self.assertEqual(intensifier.N, 1)
        self.assertTrue(new)

        # when already evaluating a challenger, return the same challenger
        intensifier.to_run = [(1, 1, 0)]
        config, new = intensifier.get_next_challenger(
            challengers=[self.config2], chooser=None)
        self.assertEqual(config, self.config1, intensifier.current_challenger)
        self.assertEqual(intensifier._chall_indx, 1)
        self.assertFalse(new)

    def test_generate_challenger(self):
        """
            test generate_challenger()
        """
        # test get generator from a list of challengers
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=None,
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  deterministic=True)

        gen = intensifier._generate_challengers(
            challengers=[self.config1, self.config2], chooser=None)

        self.assertEqual(next(gen), self.config1)
        self.assertEqual(next(gen), self.config2)
        self.assertRaises(StopIteration, next, gen)

        # test get generator from a chooser - would return only 1 configuration
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=None,
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  deterministic=True)
        chooser = SMAC4AC(self.scen, rng=1).solver.epm_chooser

        gen = intensifier._generate_challengers(challengers=None,
                                                chooser=chooser)

        self.assertEqual(next(gen).get_dictionary(), {'a': 24, 'b': 68})
        self.assertRaises(StopIteration, next, gen)

        # when both are none, raise error
        with self.assertRaisesRegex(ValueError,
                                    "No configurations/chooser provided"):
            intensifier._generate_challengers(challengers=None, chooser=None)

    def test_eval_challenger_1(self):
        """
            test eval_challenger() - a complete intensification run with a `always_race_against` configuration
        """
        def target(x):
            if x['a'] == 100:
                time.sleep(1)
            return x['a']

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats, run_obj="runtime")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1, 2],
                                  run_obj_time=True,
                                  cutoff=2,
                                  deterministic=False,
                                  always_race_against=self.config3,
                                  run_limit=1)

        self.assertEqual(intensifier.n_iters, 0)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_FIRST_CONFIG)

        # intensification iteration #1
        # run first config as incumbent if incumbent is None
        config, _ = intensifier.get_next_challenger(challengers=[self.config2],
                                                    chooser=None)
        self.assertEqual(config, self.config2)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_FIRST_CONFIG)
        # eval config 2 (=first run)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=None,
            run_history=self.rh,
        )
        self.assertEqual(inc, self.config2)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        self.assertEqual(self.stats.inc_changed, 1)
        self.assertEqual(intensifier.n_iters,
                         1)  # 1 intensification run complete!

        # intensification iteration #2
        # regular intensification begins - run incumbent first
        config, _ = intensifier.get_next_challenger(
            challengers=None,  # don't need a new list here as old one is cont'd
            chooser=None)
        self.assertEqual(config, inc)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )
        self.assertEqual(self.stats.ta_runs, 2)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        self.assertEqual(self.stats.inc_changed, 1)

        # run challenger now that the incumbent has been executed
        config, _ = intensifier.get_next_challenger(challengers=[self.config1],
                                                    chooser=None)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        self.assertEqual(config, self.config1)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )

        # challenger has a better performance, but not run on all instances yet. so incumbent stays the same
        self.assertEqual(inc, self.config2)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        self.assertTrue(intensifier.continue_challenger)

        # run challenger again on the other instance
        config, _ = intensifier.get_next_challenger(
            challengers=None,  # don't need a new list here as old one is cont'd
            chooser=None)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        self.assertEqual(config, self.config1)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )

        # challenger better than incumbent in both instances. so incumbent changed
        self.assertEqual(inc, self.config1)
        self.assertEqual(self.stats.inc_changed, 2)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_BASIS)
        self.assertFalse(intensifier.continue_challenger)

        # run basis configuration (`always_race_against`)
        config, _ = intensifier.get_next_challenger(
            challengers=None,  # don't need a new list here as old one is cont'd
            chooser=None)
        self.assertEqual(config, self.config3)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_BASIS)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )

        # the basis configuration (config3) not better than incumbent, so can move on
        self.assertEqual(inc, self.config1)
        self.assertEqual(self.stats.inc_changed, 2)
        self.assertEqual(self.stats.ta_runs, 5)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        self.assertEqual(list(self.rh.data.values())[4][2], StatusType.CAPPED)
        self.assertEqual(
            intensifier.n_iters,
            1)  # iteration continues as `min_chall` condition is not met
        self.assertIsInstance(intensifier.configs_to_run,
                              collections.abc.Iterator)
        # no configs should be left at the end
        with self.assertRaises(StopIteration):
            next(intensifier.configs_to_run)

        # intensification continues running incumbent again in same iteration...
        config, _ = intensifier.get_next_challenger(
            challengers=None,  # don't need a new list here as old one is cont'd
            chooser=None)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )

        self.assertEqual(inc, self.config1)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)

        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config1,
                                            only_max_observed_budget=True)), 3)
        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config2,
                                            only_max_observed_budget=True)), 2)
        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config3,
                                            only_max_observed_budget=True)),
            0)  # capped

    def test_eval_challenger_2(self):
        """
            test eval_challenger() - a complete intensification run without a `always_race_against` configuration
        """
        def target(x):
            return 2 * x['a'] + x['b']

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats, run_obj="quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  run_obj_time=False,
                                  deterministic=True,
                                  always_race_against=None,
                                  run_limit=1)

        self.assertEqual(intensifier.n_iters, 0)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_FIRST_CONFIG)

        # intensification iteration #1
        # run first config as incumbent if incumbent is None
        config, _ = intensifier.get_next_challenger(challengers=[self.config3],
                                                    chooser=None)
        self.assertEqual(config, self.config3)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_FIRST_CONFIG)
        # eval config 2 (=first run)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=None,
            run_history=self.rh,
        )
        self.assertEqual(inc, self.config3)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        self.assertEqual(self.stats.inc_changed, 1)
        self.assertEqual(intensifier.n_iters,
                         1)  # 1 intensification run complete!

        # regular intensification begins - run incumbent
        config, _ = intensifier.get_next_challenger(
            challengers=None,  # since incumbent is run, no configs required
            chooser=None)
        self.assertEqual(config, inc)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )

        # no new TA runs as there are no more instances to run
        self.assertEqual(inc, self.config3)
        self.assertEqual(self.stats.inc_changed, 1)
        self.assertEqual(self.stats.ta_runs, 1)
        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config3,
                                            only_max_observed_budget=True)), 1)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)

        # run challenger now that the incumbent has been executed
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config1], chooser=None)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        self.assertEqual(config, self.config2)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )

        # challenger has a better performance, so incumbent has changed
        self.assertEqual(inc, self.config2)
        self.assertEqual(self.stats.inc_changed, 2)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT
                         )  # since there is no `always_race_against`
        self.assertFalse(intensifier.continue_challenger)
        self.assertEqual(
            intensifier.n_iters,
            1)  # iteration continues as `min_chall` condition is not met

        # intensification continues running incumbent again in same iteration...
        # run incumbent
        config, _ = intensifier.get_next_challenger(
            challengers=None,  # don't need a new list here as old one is cont'd
            chooser=None)
        self.assertEqual(config, self.config2)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )

        # run challenger
        config, _ = intensifier.get_next_challenger(
            challengers=None,  # don't need a new list here as old one is cont'd
            chooser=None)
        self.assertEqual(config, self.config1)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )

        self.assertEqual(inc, self.config1)
        self.assertEqual(self.stats.inc_changed, 3)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        self.assertEqual(intensifier.n_iters,
                         2)  # 2 intensification run complete!
        # no configs should be left at the end
        with self.assertRaises(StopIteration):
            next(intensifier.configs_to_run)

        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config1,
                                            only_max_observed_budget=True)), 1)
        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config2,
                                            only_max_observed_budget=True)), 1)
        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config3,
                                            only_max_observed_budget=True)), 1)

    def test_eval_challenger_3(self):
        """
            test eval_challenger for a resumed SMAC run (first run with incumbent)
        """
        def target(x):
            return x['a']

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats, run_obj="quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  run_obj_time=False,
                                  deterministic=False,
                                  always_race_against=None,
                                  run_limit=1)

        self.assertEqual(intensifier.n_iters, 0)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_FIRST_CONFIG)

        # adding run for incumbent configuration
        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        # intensification - incumbent will be run, but not as RUN_FIRST_CONFIG stage
        config, _ = intensifier.get_next_challenger(challengers=[self.config2],
                                                    chooser=None)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=self.config1,
            run_history=self.rh,
        )

        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config1,
                                            only_max_observed_budget=True)), 2)

    def test_no_new_intensification_wo_challenger_run(self):
        """
        This test ensures that no new iteration is started if no challenger run was conducted
        """
        def target(x):
            return 2 * x['a'] + x['b']

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats, run_obj="quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(
            tae_runner=taf,
            stats=self.stats,
            traj_logger=TrajLogger(output_dir=None, stats=self.stats),
            rng=np.random.RandomState(12345),
            instances=[1],
            run_obj_time=False,
            deterministic=True,
            always_race_against=None,
            run_limit=1,
            min_chall=1,
        )

        self.assertEqual(intensifier.n_iters, 0)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_FIRST_CONFIG)

        config, _ = intensifier.get_next_challenger(challengers=[self.config3],
                                                    chooser=None)
        self.assertEqual(config, self.config3)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_FIRST_CONFIG)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=None,
            run_history=self.rh,
        )
        self.assertEqual(inc, self.config3)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        self.assertEqual(intensifier.n_iters,
                         1)  # 1 intensification run complete!

        # regular intensification begins - run incumbent
        config, _ = intensifier.get_next_challenger(
            challengers=None,  # since incumbent is run, no configs required
            chooser=None)
        self.assertEqual(config, inc)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        self.assertEqual(intensifier.n_iters, 1)

        # Check that we don't walk into the next iteration if the challenger is passed again
        config, _ = intensifier.get_next_challenger(challengers=[self.config3],
                                                    chooser=None)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        self.assertEqual(intensifier.n_iters, 1)

        intensifier._next_iteration()

        # Add a configuration, then try to execute it afterwards
        self.assertEqual(intensifier.n_iters, 2)
        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=0,
                    additional_info=None)
        intensifier.stage = IntensifierStage.RUN_CHALLENGER
        config, _ = intensifier.get_next_challenger(challengers=[self.config1],
                                                    chooser=None)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )
        self.assertEqual(intensifier.n_iters, 2)
        self.assertEqual(intensifier.num_chall_run, 0)

        # This returns the config evaluating the incumbent again
        config, _ = intensifier.get_next_challenger(challengers=None,
                                                    chooser=None)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )
        # This doesn't return a config because the array of configs is exhausted
        config, _ = intensifier.get_next_challenger(challengers=None,
                                                    chooser=None)
        self.assertIsNone(config)
        # This finally gives a runable configuration
        config, _ = intensifier.get_next_challenger(challengers=[self.config2],
                                                    chooser=None)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )
        self.assertEqual(intensifier.n_iters, 3)
        self.assertEqual(intensifier.num_chall_run, 1)
Beispiel #18
0
class TestIntensify(unittest.TestCase):
    def setUp(self):
        unittest.TestCase.setUp(self)

        self.rh = RunHistory()
        self.cs = get_config_space()
        self.config1 = Configuration(self.cs, values={'a': 0, 'b': 100})
        self.config2 = Configuration(self.cs, values={'a': 100, 'b': 0})
        self.config3 = Configuration(self.cs, values={'a': 100, 'b': 100})

        self.scen = Scenario({
            "cutoff_time": 2,
            'cs': self.cs,
            "run_obj": 'runtime',
            "output_dir": ''
        })
        self.stats = Stats(scenario=self.scen)
        self.stats.start_timing()

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

    def test_race_challenger(self):
        """
           test _race_challenger without adaptive capping
        """
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  run_obj_time=False)

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)
        intensifier.N = 1

        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh)

        self.assertEqual(inc, self.config2)

    def test_race_challenger_2(self):
        """
           test _race_challenger with adaptive capping
        """
        def target(x):
            time.sleep(1.5)
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats, run_obj="runtime")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=.001,
                    time=0.001,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=12345,
                    additional_info=None)
        intensifier.N = 1

        # config2 should have a timeout (due to adaptive capping)
        # and config1 should still be the incumbent
        inc = intensifier._race_challenger(
            challenger=self.config2,
            incumbent=self.config1,
            run_history=self.rh,
        )

        # self.assertTrue(False)
        self.assertEqual(inc, self.config1)

    def test_race_challenger_3(self):
        """
           test _race_challenger with adaptive capping on a previously capped configuration
        """
        def target(config: Configuration, seed: int, instance: str):
            if instance == 1:
                time.sleep(2.1)
            else:
                time.sleep(0.6)
            return (config['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                run_obj="runtime",
                                par_factor=1)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  cutoff=2,
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=0.5,
                    time=.5,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=12345,
                    additional_info=None)

        # config2 should have a timeout (due to adaptive capping)
        # and config1 should still be the incumbent
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config3], chooser=None)
        inc = intensifier._race_challenger(
            challenger=config,
            incumbent=self.config1,
            run_history=self.rh,
        )
        self.assertEqual(inc, self.config1)

        # further run for incumbent
        self.rh.add(config=self.config1,
                    cost=2,
                    time=2,
                    status=StatusType.TIMEOUT,
                    instance_id=2,
                    seed=12345,
                    additional_info=None)

        # give config2 a second chance - now it should run on both instances

        # run on instance 1
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config3], chooser=None)
        inc = intensifier._race_challenger(
            challenger=config,
            incumbent=self.config1,
            run_history=self.rh,
        )

        # run on instance 2
        config, _ = intensifier.get_next_challenger(challengers=[self.config3],
                                                    chooser=None)
        self.assertEqual(config, self.config2)
        self.assertTrue(intensifier.continue_challenger)

        inc = intensifier._race_challenger(
            challenger=config,
            incumbent=self.config1,
            run_history=self.rh,
        )

        # the incumbent should still be config1 because
        # config2 should get on inst 1 a full timeout
        # such that c(config1) = 1.25 and c(config2) close to 1.3
        self.assertEqual(inc, self.config1)
        # the capped run should not be counted in runs_perf_config
        self.assertAlmostEqual(self.rh.num_runs_per_config[2], 2)
        self.assertFalse(intensifier.continue_challenger)

    def test_race_challenger_large(self):
        """
           test _race_challenger using solution_quality
        """
        def target(x):
            return 1

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=list(range(10)),
                                  run_obj_time=False,
                                  deterministic=True)

        for i in range(10):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=12345,
                        additional_info=None)

        intensifier.stage = IntensifierStage.RUN_CHALLENGER

        # tie on first instances and then challenger should always win
        # and be returned as inc
        while True:
            config, _ = intensifier.get_next_challenger(
                challengers=[self.config2, self.config3], chooser=None)
            inc = intensifier._race_challenger(
                challenger=config,
                incumbent=self.config1,
                run_history=self.rh,
            )

            # stop when challenger evaluation is over
            if not intensifier.stage == IntensifierStage.RUN_CHALLENGER:
                break

        self.assertEqual(inc, self.config2)
        self.assertEqual(self.rh.get_cost(self.config2), 1)

        # get data for config2 to check that the correct run was performed
        runs = self.rh.get_runs_for_config(self.config2,
                                           only_max_observed_budget=True)
        self.assertEqual(len(runs), 10)

    def test_race_challenger_large_blocked_seed(self):
        """
           test _race_challenger whether seeds are blocked for challenger runs
        """
        def target(x):
            return 1

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=list(range(10)),
                                  run_obj_time=False,
                                  deterministic=False)

        for i in range(10):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)

        intensifier.stage = IntensifierStage.RUN_CHALLENGER

        # tie on first instances and then challenger should always win
        # and be returned as inc
        while True:
            config, _ = intensifier.get_next_challenger(
                challengers=[self.config2, self.config3], chooser=None)
            inc = intensifier._race_challenger(
                challenger=config,
                incumbent=self.config1,
                run_history=self.rh,
            )

            # stop when challenger evaluation is over
            if not intensifier.stage == IntensifierStage.RUN_CHALLENGER:
                break

        self.assertEqual(inc, self.config2)
        self.assertEqual(self.rh.get_cost(self.config2), 1)

        # get data for config2 to check that the correct run was performed
        runs = self.rh.get_runs_for_config(self.config2,
                                           only_max_observed_budget=True)
        self.assertEqual(len(runs), 10)

        seeds = sorted([r.seed for r in runs])
        self.assertEqual(seeds, list(range(10)), seeds)

    def test_add_inc_run_det(self):
        """
            test _add_inc_run()
        """
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                run_obj="solution_quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  deterministic=True)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

        # since we assume deterministic=1,
        # the second call should not add any more runs
        # given only one instance
        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

    def test_add_inc_run_nondet(self):
        """
            test _add_inc_run()
        """
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                runhistory=self.rh,
                                run_obj="solution_quality")

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1, 2],
                                  deterministic=False)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 2, self.rh.data)
        runs = self.rh.get_runs_for_config(config=self.config1,
                                           only_max_observed_budget=True)
        # exactly one run on each instance
        self.assertIn(1, [runs[0].instance, runs[1].instance])
        self.assertIn(2, [runs[0].instance, runs[1].instance])

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 3, self.rh.data)

    def test_get_next_challenger(self):
        """
            test get_next_challenger()
        """
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  deterministic=True)

        intensifier.stage = IntensifierStage.RUN_CHALLENGER

        # get a new challenger to evaluate
        config, new = intensifier.get_next_challenger(
            challengers=[self.config1, self.config2], chooser=None)

        self.assertEqual(config, self.config1, intensifier.current_challenger)
        self.assertEqual(intensifier._chall_indx, 1)
        self.assertEqual(intensifier.N, 1)
        self.assertTrue(new)

        # when already evaluating a challenger, return the same challenger
        intensifier.to_run = [(1, 1, 0)]
        config, new = intensifier.get_next_challenger(
            challengers=[self.config2], chooser=None)
        self.assertEqual(config, self.config1, intensifier.current_challenger)
        self.assertEqual(intensifier._chall_indx, 1)
        self.assertFalse(new)

    def test_generate_challenger(self):
        """
            test generate_challenger()
        """
        # test get generator from a list of challengers
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=None,
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  deterministic=True)

        gen = intensifier._generate_challengers(
            challengers=[self.config1, self.config2], chooser=None)

        self.assertEqual(next(gen), self.config1)
        self.assertEqual(next(gen), self.config2)
        self.assertRaises(StopIteration, next, gen)

        # test get generator from a chooser - would return only 1 configuration
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=None,
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  deterministic=True)
        chooser = SMAC4AC(self.scen, rng=1).solver.epm_chooser

        gen = intensifier._generate_challengers(challengers=None,
                                                chooser=chooser)

        self.assertEqual(next(gen).get_dictionary(), {'a': 24, 'b': 68})
        self.assertRaises(StopIteration, next, gen)

        # when both are none, raise error
        with self.assertRaisesRegex(ValueError,
                                    "No configurations/chooser provided"):
            intensifier._generate_challengers(challengers=None, chooser=None)

    def test_eval_challenger(self):
        """
            test eval_challenger() - a complete intensification run
        """
        def target(x):
            return x['a']

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  run_obj_time=False,
                                  deterministic=False,
                                  always_race_against=self.config3,
                                  run_limit=1)

        # run incumbent first if it was not run before
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config1, self.config3],
            chooser=None)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=None,
            run_history=self.rh,
        )

        self.assertEqual(inc, self.config2)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)

        # run challenger now that the incumbent has been executed
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config1, self.config3],
            chooser=None)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )

        # challenger should have a better performance, so incumbent should have changed
        self.assertEqual(inc, self.config1)
        self.assertEqual(self.stats.inc_changed, 1)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_DEFAULT)
        self.assertFalse(intensifier.continue_challenger)

        # run `always_race_against` now since the incumbent has changed
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config1, self.config3],
            chooser=None)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )

        self.assertEqual(inc, self.config1)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config3,
                                            only_max_observed_budget=True)), 1)
        self.assertEqual(intensifier.n_iters, 1)
        self.assertIsInstance(intensifier.configs_to_run, collections.Iterator)
        with self.assertRaises(StopIteration):
            next(intensifier.configs_to_run)
Beispiel #19
0
    def _get_incumbent(self,
                       challenger: Configuration,
                       incumbent: typing.Optional[Configuration],
                       run_history: RunHistory,
                       log_traj: bool = True) -> Configuration:
        """
        Compares the challenger with current incumbent and returns the best configuration

        Parameters
        ----------
        challenger : Configuration
            promising configuration
        incumbent : Configuration
            best configuration so far
        run_history : smac.runhistory.runhistory.RunHistory
            stores all runs we ran so far
        log_traj : bool
            whether to log changes of incumbents in trajectory

        Returns
        -------
        typing.Tuple[Configuration, float]
            incumbent and incumbent cost
        """
        # compare challenger with current incumbent
        if incumbent is None:  # first intensify run from initial design
            new_incumbent = challenger
            inc_perf = run_history.get_cost(new_incumbent)
            self.logger.info("First Incumbent found! Cost of incumbent is (%.4f)" % inc_perf)
            self.logger.info("incumbent configuration: %s" % str(challenger))
            if log_traj:
                # adding incumbent entry
                self.stats.inc_changed += 1  # first incumbent
                self.traj_logger.add_entry(train_perf=inc_perf,
                                           incumbent_id=self.stats.inc_changed,
                                           incumbent=new_incumbent)

        elif self.instance_as_budget:
            new_incumbent = self._compare_configs(incumbent, challenger,
                                                  run_history, log_traj)
            # if compare config returned none, then it is undecided. So return old incumbent
            new_incumbent = incumbent if new_incumbent is None else new_incumbent
        else:
            inc_runs = run_history.get_runs_for_config(incumbent, only_max_observed_budget=True)
            chall_runs = run_history.get_runs_for_config(challenger, only_max_observed_budget=True)
            if len(inc_runs) > 1:
                raise ValueError(
                    'Number of incumbent runs on budget %f must not exceed 1, but is %d',
                    inc_runs[0].budget, len(inc_runs),
                )
            if len(chall_runs) > 1:
                raise ValueError(
                    'Number of challenger runs on budget %f must not exceed 1, but is %d',
                    chall_runs[0].budget, len(chall_runs),
                )
            inc_run = inc_runs[0]
            chall_run = chall_runs[0]
            if inc_run.budget > chall_run.budget:
                self.logger.debug('Incumbent evaluated on higher budget than challenger (%.4f > %.4f), '
                                  'not changing the incumbent',
                                  inc_run.budget, chall_run.budget)
                new_incumbent = incumbent
            elif inc_run.budget < chall_run.budget:
                self.logger.debug('Challenger evaluated on higher budget than incumbent (%.4f > %.4f), '
                                  'changing the incumbent',
                                  chall_run.budget, inc_run.budget)
                new_incumbent = challenger
                if log_traj:
                    # adding incumbent entry
                    self.stats.inc_changed += 1
                    new_inc_cost = run_history.get_cost(new_incumbent)
                    self.traj_logger.add_entry(train_perf=new_inc_cost,
                                               incumbent_id=self.stats.inc_changed,
                                               incumbent=new_incumbent)
            else:
                chall_cost = run_history.get_cost(challenger)
                inc_cost = run_history.get_cost(incumbent)
                if chall_cost < inc_cost:
                    self.logger.info("Challenger (%.4f) is better than incumbent (%.4f) on budget %.4f.",
                                     chall_cost, inc_cost, chall_run.budget)
                    self._log_incumbent_changes(incumbent, challenger)
                    new_incumbent = challenger
                    if log_traj:
                        # adding incumbent entry
                        self.stats.inc_changed += 1  # first incumbent
                        self.traj_logger.add_entry(train_perf=chall_cost,
                                                   incumbent_id=self.stats.inc_changed,
                                                   incumbent=new_incumbent)
                else:
                    self.logger.debug("Incumbent (%.4f) is at least as good as the challenger (%.4f) on budget %.4f.",
                                      inc_cost, chall_cost, inc_run.budget)
                    new_incumbent = incumbent

        return new_incumbent
Beispiel #20
0
    def eval_challenger(self,
                        challenger: Configuration,
                        incumbent: typing.Optional[Configuration],
                        run_history: RunHistory,
                        time_bound: float = float(MAXINT),
                        log_traj: bool = True) -> typing.Tuple[Configuration, float]:
        """
        Running intensification via successive halving to determine the incumbent configuration.
        *Side effect:* adds runs to run_history

        Parameters
        ----------
        challenger : Configuration
            promising configuration
        incumbent : typing.Optional[Configuration]
            best configuration so far, None in 1st run
        run_history : smac.runhistory.runhistory.RunHistory
            stores all runs we ran so far
        time_bound : float, optional (default=2 ** 31 - 1)
            time in [sec] available to perform intensify
        log_traj : bool
            whether to log changes of incumbents in trajectory

        Returns
        -------
        typing.Tuple[Configuration, float]
            incumbent and incumbent cost
        """
        # calculating the incumbent's performance for adaptive capping
        # this check is required because:
        #   - there is no incumbent performance for the first ever 'intensify' run (from initial design)
        #   - during the 1st intensify run, the incumbent shouldn't be capped after being compared against itself
        if incumbent and incumbent != challenger:
            inc_runs = run_history.get_runs_for_config(incumbent, only_max_observed_budget=True)
            inc_sum_cost = run_history.sum_cost(config=incumbent, instance_seed_budget_keys=inc_runs)
        else:
            inc_sum_cost = np.inf
            if self.first_run:
                self.logger.info("First run, no incumbent provided; challenger is assumed to be the incumbent")
                incumbent = challenger
                self.first_run = False

        # select which instance to run current config on
        curr_budget = self.all_budgets[self.stage]

        # selecting instance-seed subset for this budget, depending on the kind of budget
        if self.instance_as_budget:
            prev_budget = int(self.all_budgets[self.stage - 1]) if self.stage > 0 else 0
            curr_insts = self.inst_seed_pairs[int(prev_budget):int(curr_budget)]
        else:
            curr_insts = self.inst_seed_pairs
        n_insts_remaining = len(curr_insts) - self.curr_inst_idx - 1

        self.logger.debug(" Running challenger  -  %s" % str(challenger))

        # run the next instance-seed pair for the given configuration
        instance, seed = curr_insts[self.curr_inst_idx]

        # selecting cutoff if running adaptive capping
        cutoff = self.cutoff
        if self.run_obj_time:
            cutoff = self._adapt_cutoff(challenger=challenger,
                                        run_history=run_history,
                                        inc_sum_cost=inc_sum_cost)
            if cutoff is not None and cutoff <= 0:
                # ran out of time to validate challenger
                self.logger.debug("Stop challenger intensification due to adaptive capping.")
                self.curr_inst_idx = np.inf

        self.logger.debug('Cutoff for challenger: %s' % str(cutoff))

        try:
            # run target algorithm for each instance-seed pair
            self.logger.debug("Execute target algorithm")

            try:
                status, cost, dur, res = self.tae_runner.start(
                    config=challenger,
                    instance=instance,
                    seed=seed,
                    cutoff=cutoff,
                    budget=0.0 if self.instance_as_budget else curr_budget,
                    instance_specific=self.instance_specifics.get(instance, "0"),
                    # Cutoff might be None if self.cutoff is None, but then the first if statement prevents
                    # evaluation of the second if statement
                    capped=(self.cutoff is not None) and (cutoff < self.cutoff)  # type: ignore[operator] # noqa F821
                )
                self._ta_time += dur
                self.num_run += 1
                self.curr_inst_idx += 1

            except CappedRunException:
                # We move on to the next configuration if a configuration is capped
                self.logger.debug("Budget exhausted by adaptive capping; "
                                  "Interrupting current challenger and moving on to the next one")
                # ignore all pending instances
                self.curr_inst_idx = np.inf
                n_insts_remaining = 0
                status = StatusType.CAPPED

            # adding challengers to the list of evaluated challengers
            #  - Stop: CAPPED/CRASHED/TIMEOUT/MEMOUT (!= SUCCESS & DONOTADVANCE)
            #  - Advance to next stage: SUCCESS
            # curr_challengers is a set, so "at least 1" success can be counted by set addition (no duplicates)
            # If a configuration is successful, it is added to curr_challengers.
            # if it fails it is added to fail_challengers.
            if np.isfinite(self.curr_inst_idx) and status in [StatusType.SUCCESS, StatusType.DONOTADVANCE]:
                self.success_challengers.add(challenger)  # successful configs
            else:
                self.fail_challengers.add(challenger)  # capped/crashed/do not advance configs

            # get incumbent if all instances have been evaluated
            if n_insts_remaining <= 0:
                incumbent = self._compare_configs(challenger=challenger,
                                                  incumbent=incumbent,
                                                  run_history=run_history,
                                                  log_traj=log_traj)
        except BudgetExhaustedException:
            # Returning the final incumbent selected so far because we ran out of optimization budget
            self.logger.debug("Budget exhausted; "
                              "Interrupting optimization run and returning current incumbent")

        # if all configurations for the current stage have been evaluated, reset stage
        num_chal_evaluated = len(self.success_challengers.union(self.fail_challengers)) + self.fail_chal_offset
        if num_chal_evaluated == self.n_configs_in_stage[self.stage] and n_insts_remaining <= 0:

            self.logger.info('Successive Halving iteration-step: %d-%d with '
                             'budget [%.2f / %d] - evaluated %d challenger(s)' %
                             (self.sh_iters + 1, self.stage + 1, self.all_budgets[self.stage], self.max_budget,
                              self.n_configs_in_stage[self.stage]))

            self._update_stage(run_history=run_history)

        # get incumbent cost
        inc_perf = run_history.get_cost(incumbent)

        return incumbent, inc_perf
Beispiel #21
0
class TestIntensify(unittest.TestCase):
    def setUp(self):
        unittest.TestCase.setUp(self)

        self.rh = RunHistory()
        self.cs = get_config_space()
        self.config1 = Configuration(self.cs, values={"a": 0, "b": 100})
        self.config2 = Configuration(self.cs, values={"a": 100, "b": 0})
        self.config3 = Configuration(self.cs, values={"a": 100, "b": 100})

        self.scen = Scenario({
            "cutoff_time": 2,
            "cs": self.cs,
            "run_obj": "runtime",
            "output_dir": "",
            "deterministic": False,
            "limit_resources": True,
        })
        self.stats = Stats(scenario=self.scen)
        self.stats.start_timing()

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

    def test_race_challenger_1(self):
        """
        Makes sure that a racing configuration with better performance,
        is selected as incumbent
        No adaptive capping
        """
        def target(x):
            return (x["a"] + 1) / 1000.0

        taf = ExecuteTAFuncDict(use_pynisher=False,
                                ta=target,
                                stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(
            stats=self.stats,
            traj_logger=TrajLogger(output_dir=None, stats=self.stats),
            rng=np.random.RandomState(12345),
            instances=[1],
            run_obj_time=False,
        )

        self.rh.add(
            config=self.config1,
            cost=1,
            time=1,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=None,
            additional_info=None,
        )

        intensifier.N = 1
        inc, instance, seed, cutoff = intensifier._get_next_racer(
            challenger=self.config2,
            incumbent=self.config1,
            run_history=self.rh)

        run_info = RunInfo(
            config=self.config2,
            instance=instance,
            instance_specific="0",
            cutoff=cutoff,
            seed=seed,
            capped=False,
            budget=0.0,
        )

        result = eval_challenger(run_info, taf, self.stats, self.rh)

        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=self.config1,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )

        self.assertEqual(inc, self.config2)
        self.assertEqual(intensifier.num_run, 1)
        self.assertEqual(intensifier.num_chall_run, 1)

    def test_race_challenger_2(self):
        """
        Makes sure that a racing configuration with better performance,
        that is capped, doesn't substitute the incumbent.
        """
        def target(x):
            time.sleep(1.5)
            return (x["a"] + 1) / 1000.0

        taf = ExecuteTAFuncDict(use_pynisher=False,
                                ta=target,
                                stats=self.stats,
                                run_obj="runtime")
        taf.runhistory = self.rh

        intensifier = Intensifier(
            stats=self.stats,
            traj_logger=TrajLogger(output_dir=None, stats=self.stats),
            rng=np.random.RandomState(12345),
            instances=[1],
        )

        self.rh.add(
            config=self.config1,
            cost=0.001,
            time=0.001,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=12345,
            additional_info=None,
        )
        intensifier.N = 1
        # config2 should have a timeout (due to adaptive capping)
        # and config1 should still be the incumbent
        inc, instance, seed, cutoff = intensifier._get_next_racer(
            challenger=self.config2,
            incumbent=self.config1,
            run_history=self.rh)
        run_info = RunInfo(
            config=self.config2,
            instance=instance,
            instance_specific="0",
            seed=seed,
            cutoff=cutoff,
            capped=True,
            budget=0.0,
        )

        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=self.config1,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )

        self.assertEqual(inc, self.config1)
        self.assertEqual(intensifier.num_run, 1)
        self.assertEqual(intensifier.num_chall_run, 1)

    def test_race_challenger_3(self):
        """
        test _race_challenger with adaptive capping on a previously capped configuration
        """
        def target(config: Configuration, seed: int, instance: str):
            if instance == 1:
                time.sleep(2.1)
            else:
                time.sleep(0.6)
            return (config["a"] + 1) / 1000.0

        taf = ExecuteTAFuncDict(
            use_pynisher=False,
            ta=target,
            stats=self.stats,
            run_obj="runtime",
            par_factor=1,
        )
        taf.runhistory = self.rh

        intensifier = Intensifier(
            stats=self.stats,
            traj_logger=TrajLogger(output_dir=None, stats=self.stats),
            rng=np.random.RandomState(12345),
            cutoff=2,
            instances=[1],
        )

        self.rh.add(
            config=self.config1,
            cost=0.5,
            time=0.5,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=12345,
            additional_info=None,
        )

        # config2 should have a timeout (due to adaptive capping)
        # and config1 should still be the incumbent
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config3], chooser=None)
        inc, instance, seed, cutoff = intensifier._get_next_racer(
            challenger=config, incumbent=self.config1, run_history=self.rh)
        run_info = RunInfo(
            config=config,
            instance=instance,
            instance_specific="0",
            seed=seed,
            cutoff=cutoff,
            capped=True,
            budget=0.0,
        )
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=self.config1,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )

        self.assertEqual(inc, self.config1)

        # further run for incumbent
        self.rh.add(
            config=self.config1,
            cost=2,
            time=2,
            status=StatusType.TIMEOUT,
            instance_id=2,
            seed=12345,
            additional_info=None,
        )

        # give config2 a second chance - now it should run on both instances

        # run on instance 1
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config3], chooser=None)
        inc, instance, seed, cutoff = intensifier._get_next_racer(
            challenger=config, incumbent=self.config1, run_history=self.rh)
        run_info = RunInfo(
            config=config,
            instance=instance,
            instance_specific="0",
            seed=seed,
            cutoff=cutoff,
            capped=False,
            budget=0.0,
        )
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=self.config1,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )

        # run on instance 2
        config, _ = intensifier.get_next_challenger(challengers=[self.config3],
                                                    chooser=None)
        self.assertEqual(config, self.config2)
        self.assertTrue(intensifier.continue_challenger)

        inc, instance, seed, cutoff = intensifier._get_next_racer(
            challenger=config, incumbent=self.config1, run_history=self.rh)
        run_info = RunInfo(
            config=config,
            instance=instance,
            instance_specific="0",
            seed=seed,
            cutoff=cutoff,
            capped=False,
            budget=0.0,
        )

        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=self.config1,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )

        # the incumbent should still be config1 because
        # config2 should get on inst 1 a full timeout
        # such that c(config1) = 1.25 and c(config2) close to 1.3
        self.assertEqual(inc, self.config1)
        # the capped run should not be counted in runs_perf_config
        self.assertAlmostEqual(self.rh.num_runs_per_config[2], 2)
        self.assertFalse(intensifier.continue_challenger)

        self.assertEqual(intensifier.num_run, 3)
        self.assertEqual(intensifier.num_chall_run, 3)

    def test_race_challenger_large(self):
        """
        test _race_challenger using solution_quality
        """
        def target(x):
            return 1

        taf = ExecuteTAFuncDict(use_pynisher=False,
                                ta=target,
                                stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(
            stats=self.stats,
            traj_logger=TrajLogger(output_dir=None, stats=self.stats),
            rng=np.random.RandomState(12345),
            instances=list(range(10)),
            run_obj_time=False,
            deterministic=True,
        )

        for i in range(10):
            self.rh.add(
                config=self.config1,
                cost=i + 1,
                time=1,
                status=StatusType.SUCCESS,
                instance_id=i,
                seed=12345,
                additional_info=None,
            )

        intensifier.stage = IntensifierStage.RUN_CHALLENGER

        # tie on first instances and then challenger should always win
        # and be returned as inc
        while True:
            if intensifier.continue_challenger:
                config = intensifier.current_challenger
            else:
                config, _ = intensifier.get_next_challenger(
                    challengers=[self.config2, self.config3], chooser=None)
            inc, instance, seed, cutoff = intensifier._get_next_racer(
                challenger=config, incumbent=self.config1, run_history=self.rh)
            run_info = RunInfo(
                config=config,
                instance=instance,
                instance_specific="0",
                seed=seed,
                cutoff=cutoff,
                capped=False,
                budget=0.0,
            )

            result = eval_challenger(run_info, taf, self.stats, self.rh)
            inc, perf = intensifier.process_results(
                run_info=run_info,
                incumbent=self.config1,
                run_history=self.rh,
                time_bound=np.inf,
                result=result,
            )

            # stop when challenger evaluation is over
            if not intensifier.stage == IntensifierStage.RUN_CHALLENGER:
                break

        self.assertEqual(inc, self.config2)
        self.assertEqual(self.rh.get_cost(self.config2), 1)

        # get data for config2 to check that the correct run was performed
        runs = self.rh.get_runs_for_config(self.config2,
                                           only_max_observed_budget=True)
        self.assertEqual(len(runs), 10)

        self.assertEqual(intensifier.num_run, 10)
        self.assertEqual(intensifier.num_chall_run, 10)

    def test_race_challenger_large_blocked_seed(self):
        """
        test _race_challenger whether seeds are blocked for challenger runs
        """
        def target(x):
            return 1

        taf = ExecuteTAFuncDict(use_pynisher=False,
                                ta=target,
                                stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(
            stats=self.stats,
            traj_logger=TrajLogger(output_dir=None, stats=self.stats),
            rng=np.random.RandomState(12345),
            instances=list(range(10)),
            run_obj_time=False,
            deterministic=False,
        )

        for i in range(10):
            self.rh.add(
                config=self.config1,
                cost=i + 1,
                time=1,
                status=StatusType.SUCCESS,
                instance_id=i,
                seed=i,
                additional_info=None,
            )

        intensifier.stage = IntensifierStage.RUN_CHALLENGER

        # tie on first instances and then challenger should always win
        # and be returned as inc
        while True:
            if intensifier.continue_challenger:
                config = intensifier.current_challenger
            else:
                config, _ = intensifier.get_next_challenger(
                    challengers=[self.config2, self.config3], chooser=None)
            inc, instance, seed, cutoff = intensifier._get_next_racer(
                challenger=config, incumbent=self.config1, run_history=self.rh)
            run_info = RunInfo(
                config=config,
                instance=instance,
                instance_specific="0",
                seed=seed,
                cutoff=cutoff,
                capped=False,
                budget=0.0,
            )
            result = eval_challenger(run_info, taf, self.stats, self.rh)
            inc, perf = intensifier.process_results(
                run_info=run_info,
                incumbent=self.config1,
                run_history=self.rh,
                time_bound=np.inf,
                result=result,
            )

            # stop when challenger evaluation is over
            if not intensifier.stage == IntensifierStage.RUN_CHALLENGER:
                break

        self.assertEqual(inc, self.config2)
        self.assertEqual(self.rh.get_cost(self.config2), 1)

        # get data for config2 to check that the correct run was performed
        runs = self.rh.get_runs_for_config(self.config2,
                                           only_max_observed_budget=True)
        self.assertEqual(len(runs), 10)

        seeds = sorted([r.seed for r in runs])
        self.assertEqual(seeds, list(range(10)), seeds)

        self.assertEqual(intensifier.num_run, 10)
        self.assertEqual(intensifier.num_chall_run, 10)

    def test_add_inc_run_det(self):
        """
        test _add_inc_run()
        """
        def target(x):
            return (x["a"] + 1) / 1000.0

        taf = ExecuteTAFuncDict(use_pynisher=False,
                                ta=target,
                                stats=self.stats,
                                run_obj="solution_quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(
            stats=self.stats,
            traj_logger=TrajLogger(output_dir=None, stats=self.stats),
            rng=np.random.RandomState(12345),
            instances=[1],
            deterministic=True,
        )

        instance, seed, cutoff = intensifier._get_next_inc_run(
            available_insts=intensifier._get_inc_available_inst(
                incumbent=self.config1, run_history=self.rh))
        run_info = RunInfo(
            config=self.config1,
            instance=instance,
            instance_specific="0",
            seed=seed,
            cutoff=cutoff,
            capped=False,
            budget=0.0,
        )
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        intensifier.stage = IntensifierStage.PROCESS_FIRST_CONFIG_RUN
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=self.config1,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

        # since we assume deterministic=1,
        # the second call should not add any more runs
        # given only one instance
        # So the returned seed/instance is None so that a new
        # run to be triggered is not launched
        available_insts = intensifier._get_inc_available_inst(
            incumbent=self.config1, run_history=self.rh)
        # Make sure that the list is empty, and hence no new call
        # of incumbent will be triggered
        self.assertFalse(available_insts)

        # The following two tests evaluate to zero because _next_iteration is triggered by _add_inc_run
        # as it is the first evaluation of this intensifier
        # After the above incumbent run, the stage is
        # IntensifierStage.RUN_CHALLENGER. Change it to test next iteration
        intensifier.stage = IntensifierStage.PROCESS_FIRST_CONFIG_RUN
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=None,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )
        self.assertEqual(intensifier.num_run, 0)
        self.assertEqual(intensifier.num_chall_run, 0)

    def test_add_inc_run_nondet(self):
        """
        test _add_inc_run()
        """
        def target(x):
            return (x["a"] + 1) / 1000.0

        taf = ExecuteTAFuncDict(use_pynisher=False,
                                ta=target,
                                stats=self.stats,
                                run_obj="solution_quality")

        intensifier = Intensifier(
            stats=self.stats,
            traj_logger=TrajLogger(output_dir=None, stats=self.stats),
            rng=np.random.RandomState(12345),
            instances=[1, 2],
            deterministic=False,
        )

        instance, seed, cutoff = intensifier._get_next_inc_run(
            available_insts=intensifier._get_inc_available_inst(
                incumbent=self.config1, run_history=self.rh))
        run_info = RunInfo(
            config=self.config1,
            instance=instance,
            instance_specific="0",
            seed=seed,
            cutoff=cutoff,
            capped=False,
            budget=0.0,
        )
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=self.config1,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

        instance, seed, cutoff = intensifier._get_next_inc_run(
            available_insts=intensifier._get_inc_available_inst(
                incumbent=self.config1, run_history=self.rh))
        run_info = RunInfo(
            config=self.config1,
            instance=instance,
            instance_specific="0",
            seed=seed,
            cutoff=cutoff,
            capped=False,
            budget=0.0,
        )
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=self.config1,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )
        self.assertEqual(len(self.rh.data), 2, self.rh.data)
        runs = self.rh.get_runs_for_config(config=self.config1,
                                           only_max_observed_budget=True)
        # exactly one run on each instance
        self.assertIn(1, [runs[0].instance, runs[1].instance])
        self.assertIn(2, [runs[0].instance, runs[1].instance])

        instance, seed, cutoff = intensifier._get_next_inc_run(
            available_insts=intensifier._get_inc_available_inst(
                incumbent=self.config1, run_history=self.rh))
        run_info = RunInfo(
            config=self.config1,
            instance=instance,
            instance_specific="0",
            seed=seed,
            cutoff=cutoff,
            capped=False,
            budget=0.0,
        )
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=self.config1,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )
        self.assertEqual(len(self.rh.data), 3, self.rh.data)

        # The number of runs performed should be 3
        # No Next iteration call as an incumbent is provided
        self.assertEqual(intensifier.num_run, 2)
        self.assertEqual(intensifier.num_chall_run, 0)

    def testget_next_challenger(self):
        """
        test get_next_challenger()
        """
        intensifier = Intensifier(
            stats=self.stats,
            traj_logger=TrajLogger(output_dir=None, stats=self.stats),
            rng=np.random.RandomState(12345),
            instances=[1],
            deterministic=True,
        )

        intensifier.stage = IntensifierStage.RUN_CHALLENGER

        # get a new challenger to evaluate
        config, new = intensifier.get_next_challenger(
            challengers=[self.config1, self.config2], chooser=None)

        self.assertEqual(config, self.config1, intensifier.current_challenger)
        self.assertEqual(intensifier._chall_indx, 1)
        self.assertEqual(intensifier.N, 1)
        self.assertTrue(new)

        # when already evaluating a challenger, return the same challenger
        intensifier.to_run = [(1, 1, 0)]
        config, new = intensifier.get_next_challenger(
            challengers=[self.config2], chooser=None)
        self.assertEqual(config, self.config1, intensifier.current_challenger)
        self.assertEqual(intensifier._chall_indx, 1)
        self.assertFalse(new)

    def test_generate_challenger(self):
        """
        test generate_challenger()
        """
        # test get generator from a list of challengers
        intensifier = Intensifier(
            stats=self.stats,
            traj_logger=None,
            rng=np.random.RandomState(12345),
            instances=[1],
            deterministic=True,
        )

        gen = intensifier._generate_challengers(
            challengers=[self.config1, self.config2], chooser=None)

        self.assertEqual(next(gen), self.config1)
        self.assertEqual(next(gen), self.config2)
        self.assertRaises(StopIteration, next, gen)

        # test get generator from a chooser - would return only 1 configuration
        intensifier = Intensifier(
            stats=self.stats,
            traj_logger=None,
            rng=np.random.RandomState(12345),
            instances=[1],
            deterministic=True,
        )
        chooser = SMAC4AC(self.scen, rng=1).solver.epm_chooser

        gen = intensifier._generate_challengers(challengers=None,
                                                chooser=chooser)

        self.assertEqual(next(gen).get_dictionary(), {"a": 24, "b": 68})
        self.assertRaises(StopIteration, next, gen)

        # when both are none, raise error
        with self.assertRaisesRegex(ValueError,
                                    "No configurations/chooser provided"):
            intensifier._generate_challengers(challengers=None, chooser=None)

    def test_eval_challenger_1(self):
        """
        test eval_challenger() - a complete intensification run with a `always_race_against` configuration
        """

        print(self.rh)

        def target(x):
            if x["a"] == 100:
                time.sleep(1)
            return x["a"]

        taf = ExecuteTAFuncDict(use_pynisher=False,
                                ta=target,
                                stats=self.stats,
                                run_obj="runtime")
        taf.runhistory = self.rh

        intensifier = Intensifier(
            stats=self.stats,
            traj_logger=TrajLogger(output_dir=None, stats=self.stats),
            rng=np.random.RandomState(12345),
            instances=[1, 2],
            run_obj_time=True,
            cutoff=2,
            deterministic=False,
            always_race_against=self.config3,
            run_limit=1,
        )

        self.assertEqual(intensifier.n_iters, 0)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_FIRST_CONFIG)

        # intensification iteration #1
        # run first config as incumbent if incumbent is None
        intent, run_info = intensifier.get_next_run(
            incumbent=None,
            run_history=self.rh,
            challengers=[self.config2],
            chooser=None,
        )
        self.assertEqual(run_info.config, self.config2)
        self.assertEqual(intensifier.stage,
                         IntensifierStage.PROCESS_FIRST_CONFIG_RUN)
        # eval config 2 (=first run)
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=None,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )

        self.assertEqual(inc, self.config2)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        self.assertEqual(self.stats.inc_changed, 1)
        self.assertEqual(intensifier.n_iters,
                         1)  # 1 intensification run complete!

        # intensification iteration #2
        # regular intensification begins - run incumbent first
        intent, run_info = intensifier.get_next_run(
            challengers=None,  # don't need a new list here as old one is cont'd
            incumbent=inc,
            run_history=self.rh,
            chooser=None,
        )
        self.assertEqual(run_info.config, inc)
        self.assertEqual(intensifier.stage,
                         IntensifierStage.PROCESS_INCUMBENT_RUN)
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=inc,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        self.assertEqual(self.stats.inc_changed, 1)

        # run challenger now that the incumbent has been executed
        intent, run_info = intensifier.get_next_run(challengers=[self.config1],
                                                    incumbent=inc,
                                                    run_history=self.rh,
                                                    chooser=None)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        self.assertEqual(run_info.config, self.config1)
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=inc,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )

        # challenger has a better performance, but not run on all instances yet. so incumbent stays the same
        self.assertEqual(inc, self.config2)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        self.assertTrue(intensifier.continue_challenger)

        # run challenger again on the other instance
        intent, run_info = intensifier.get_next_run(
            challengers=None,  # don't need a new list here as old one is cont'd
            incumbent=inc,
            run_history=self.rh,
            chooser=None,
        )
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        self.assertEqual(run_info.config, self.config1)
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=inc,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )

        # challenger better than incumbent in both instances. so incumbent changed
        self.assertEqual(inc, self.config1)
        self.assertEqual(self.stats.inc_changed, 2)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_BASIS)
        self.assertFalse(intensifier.continue_challenger)

        # run basis configuration (`always_race_against`)
        intent, run_info = intensifier.get_next_run(
            challengers=None,  # don't need a new list here as old one is cont'd
            incumbent=inc,
            run_history=self.rh,
            chooser=None,
        )
        self.assertEqual(run_info.config, self.config3)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_BASIS)
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=inc,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )

        # the basis configuration (config3) not better than incumbent, so can move on
        self.assertEqual(inc, self.config1)
        self.assertEqual(self.stats.inc_changed, 2)
        self.assertEqual(intensifier.stage,
                         IntensifierStage.RUN_INCUMBENT,
                         msg=self.rh.data.items())
        self.assertEqual(list(self.rh.data.values())[4][2], StatusType.CAPPED)
        self.assertEqual(
            intensifier.n_iters,
            1)  # iteration continues as `min_chall` condition is not met
        self.assertIsInstance(intensifier.configs_to_run,
                              collections.abc.Iterator)
        # no configs should be left at the end
        with self.assertRaises(StopIteration):
            next(intensifier.configs_to_run)

        # intensification continues running incumbent again in same iteration...
        intent, run_info = intensifier.get_next_run(
            challengers=None,  # don't need a new list here as old one is cont'd
            incumbent=inc,
            run_history=self.rh,
            chooser=None,
        )
        self.assertEqual(intensifier.stage,
                         IntensifierStage.PROCESS_INCUMBENT_RUN)
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=inc,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )

        self.assertEqual(inc, self.config1)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)

        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config1,
                                            only_max_observed_budget=True)),
            3,
        )
        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config2,
                                            only_max_observed_budget=True)),
            2,
        )
        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config3,
                                            only_max_observed_budget=True)),
            0,
        )  # capped

    def test_eval_challenger_2(self):
        """
        test eval_challenger() - a complete intensification run without a `always_race_against` configuration
        """
        def target(x):
            return 2 * x["a"] + x["b"]

        taf = ExecuteTAFuncDict(use_pynisher=False,
                                ta=target,
                                stats=self.stats,
                                run_obj="quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(
            stats=self.stats,
            traj_logger=TrajLogger(output_dir=None, stats=self.stats),
            rng=np.random.RandomState(12345),
            instances=[1],
            run_obj_time=False,
            deterministic=True,
            always_race_against=None,
            run_limit=1,
        )

        self.assertEqual(intensifier.n_iters, 0)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_FIRST_CONFIG)

        # intensification iteration #1
        # run first config as incumbent if incumbent is None
        intent, run_info = intensifier.get_next_run(
            challengers=[self.config3],
            incumbent=None,
            run_history=self.rh,
            chooser=None,
        )
        self.assertEqual(run_info.config, self.config3)
        self.assertEqual(intensifier.stage,
                         IntensifierStage.PROCESS_FIRST_CONFIG_RUN)
        # eval config 2 (=first run)
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=None,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )
        self.assertEqual(inc, self.config3)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        self.assertEqual(self.stats.inc_changed, 1)
        self.assertEqual(intensifier.n_iters,
                         1)  # 1 intensification run complete!

        # regular intensification begins - run incumbent
        # Normally a challenger will be given, which in this case is the incumbent
        # But no more instances are available. So to prevent cicles
        # where No iteration happens, provide the challengers
        intent, run_info = intensifier.get_next_run(
            challengers=[
                self.config2,
                self.config1,
            ],  # since incumbent is run, no configs required
            incumbent=inc,
            run_history=self.rh,
            chooser=None,
        )

        # no new TA runs as there are no more instances to run
        self.assertEqual(inc, self.config3)
        self.assertEqual(self.stats.inc_changed, 1)
        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config3,
                                            only_max_observed_budget=True)),
            1,
        )
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)

        # run challenger now that the incumbent has been executed
        # So this call happen above, to save one iteration
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        self.assertEqual(run_info.config, self.config2)
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=inc,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )

        # challenger has a better performance, so incumbent has changed
        self.assertEqual(inc, self.config2)
        self.assertEqual(self.stats.inc_changed, 2)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT
                         )  # since there is no `always_race_against`
        self.assertFalse(intensifier.continue_challenger)
        self.assertEqual(
            intensifier.n_iters,
            1)  # iteration continues as `min_chall` condition is not met

        # intensification continues running incumbent again in same iteration...
        # run incumbent
        # Same here, No further instance-seed pairs for incumbent available
        # so above call gets the new config to run
        self.assertEqual(run_info.config, self.config2)

        # There is a transition from:
        # IntensifierStage.RUN_FIRST_CONFIG-> IntensifierStage.RUN_INCUMBENT
        # Because after the first run, incumbent is run.
        # Nevertheless, there is now a transition:
        # IntensifierStage.RUN_INCUMBENT->IntensifierStage.RUN_CHALLENGER
        # because in add_inc_run, there are more available instance pairs
        # FROM: IntensifierStage.RUN_INCUMBENT TO: IntensifierStage.RUN_INCUMBENT WHY: no more to run
        # if all <instance, seed> have been run, compare challenger performance
        # self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)

        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=inc,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )

        # run challenger
        intent, run_info = intensifier.get_next_run(
            challengers=None,  # don't need a new list here as old one is cont'd
            incumbent=inc,
            run_history=self.rh,
            chooser=None,
        )
        self.assertEqual(run_info.config, self.config1)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=inc,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )

        self.assertEqual(inc, self.config1)
        self.assertEqual(self.stats.inc_changed, 3)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        self.assertEqual(intensifier.n_iters,
                         2)  # 2 intensification run complete!
        # no configs should be left at the end
        with self.assertRaises(StopIteration):
            next(intensifier.configs_to_run)

        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config1,
                                            only_max_observed_budget=True)),
            1,
        )
        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config2,
                                            only_max_observed_budget=True)),
            1,
        )
        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config3,
                                            only_max_observed_budget=True)),
            1,
        )

    def test_eval_challenger_3(self):
        """
        test eval_challenger for a resumed SMAC run (first run with incumbent)
        """
        def target(x):
            return x["a"]

        taf = ExecuteTAFuncDict(use_pynisher=False,
                                ta=target,
                                stats=self.stats,
                                run_obj="quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(
            stats=self.stats,
            traj_logger=TrajLogger(output_dir=None, stats=self.stats),
            rng=np.random.RandomState(12345),
            instances=[1],
            run_obj_time=False,
            deterministic=False,
            always_race_against=None,
            run_limit=1,
        )

        self.assertEqual(intensifier.n_iters, 0)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_FIRST_CONFIG)

        # adding run for incumbent configuration
        self.rh.add(
            config=self.config1,
            cost=1,
            time=1,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=None,
            additional_info=None,
        )

        # intensification - incumbent will be run, but not as RUN_FIRST_CONFIG stage
        intent_, run_info = intensifier.get_next_run(
            challengers=[self.config2],
            incumbent=self.config1,
            run_history=self.rh,
            chooser=None,
        )
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=self.config1,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )

        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)
        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config1,
                                            only_max_observed_budget=True)),
            2,
        )

    def test_no_new_intensification_wo_challenger_run(self):
        """
        This test ensures that no new iteration is started if no challenger run was conducted
        """
        def target(x):
            return 2 * x["a"] + x["b"]

        taf = ExecuteTAFuncDict(use_pynisher=False,
                                ta=target,
                                stats=self.stats,
                                run_obj="quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(
            stats=self.stats,
            traj_logger=TrajLogger(output_dir=None, stats=self.stats),
            rng=np.random.RandomState(12345),
            instances=[1],
            run_obj_time=False,
            deterministic=True,
            always_race_against=None,
            run_limit=1,
            min_chall=1,
        )

        self.assertEqual(intensifier.n_iters, 0)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_FIRST_CONFIG)

        intent, run_info = intensifier.get_next_run(
            challengers=[self.config3],
            incumbent=None,
            run_history=self.rh,
            chooser=None,
        )
        self.assertEqual(run_info.config, self.config3)
        self.assertEqual(intensifier.stage,
                         IntensifierStage.PROCESS_FIRST_CONFIG_RUN)
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=None,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )
        self.assertEqual(inc, self.config3)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        self.assertEqual(intensifier.n_iters,
                         1)  # 1 intensification run complete!

        # regular intensification begins - run incumbent

        # No further instance-seed pairs for incumbent available
        # Here None challenger is suggested. Code jumps to next iteration
        # This causes a transition from RUN_INCUMBENT->RUN_CHALLENGER
        # But then, the next configuration to run is the incumbent
        # We don't rerun the incumbent (see message):
        # Challenger was the same as the current incumbent; Skipping challenger
        # Then, we try to get more challengers, but below all challengers
        # Provided are config3, the incumbent which means nothing more to run
        intent, run_info = intensifier.get_next_run(
            challengers=[self.config3
                         ],  # since incumbent is run, no configs required
            incumbent=inc,
            run_history=self.rh,
            chooser=None,
        )

        self.assertEqual(run_info.config, None)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)

        intensifier._next_iteration()

        # Add a configuration, then try to execute it afterwards
        self.assertEqual(intensifier.n_iters, 2)

        self.rh.add(
            config=self.config1,
            cost=1,
            time=1,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=0,
            additional_info=None,
        )
        intensifier.stage = IntensifierStage.RUN_CHALLENGER

        # In the upcoming get next run, the stage is RUN_CHALLENGER
        # so the intensifier tries to run config1. Nevertheless,
        # there are no further instances for this configuration available.
        # In this scenario, the intensifier produces a SKIP intent as an indication
        # that a new iteration must be initiated, and for code simplicity,
        # relies on a new call to get_next_run to yield more configurations
        intent, run_info = intensifier.get_next_run(challengers=[self.config1],
                                                    incumbent=inc,
                                                    run_history=self.rh,
                                                    chooser=None)
        self.assertEqual(intent, RunInfoIntent.SKIP)

        # This doesn't return a config because the array of configs is exhausted
        intensifier.stage = IntensifierStage.RUN_CHALLENGER
        config, _ = intensifier.get_next_challenger(challengers=None,
                                                    chooser=None)
        self.assertIsNone(config)
        # This finally gives a runable configuration
        intent, run_info = intensifier.get_next_run(challengers=[self.config2],
                                                    incumbent=inc,
                                                    run_history=self.rh,
                                                    chooser=None)
        result = eval_challenger(run_info, taf, self.stats, self.rh)
        inc, perf = intensifier.process_results(
            run_info=run_info,
            incumbent=inc,
            run_history=self.rh,
            time_bound=np.inf,
            result=result,
        )
        # 4 Iterations due to the proactive runs
        # of get next challenger
        self.assertEqual(intensifier.n_iters, 3)
        self.assertEqual(intensifier.num_chall_run, 1)
Beispiel #22
0
class TestIntensify(unittest.TestCase):
    def setUp(self):
        unittest.TestCase.setUp(self)

        self.rh = RunHistory(aggregate_func=average_cost)
        self.cs = get_config_space()
        self.config1 = Configuration(self.cs, values={'a': 0, 'b': 100})
        self.config2 = Configuration(self.cs, values={'a': 100, 'b': 0})
        self.config3 = Configuration(self.cs, values={'a': 100, 'b': 100})

        self.scen = Scenario({
            "cutoff_time": 2,
            'cs': self.cs,
            "output_dir": ""
        })
        self.stats = Stats(scenario=self.scen)
        self.stats.start_timing()

        self.logger = logging.getLogger("Test")

    def test_compare_configs_chall(self):
        '''
            challenger is better but has not enough runs
        '''
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=None,
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=2,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config2,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh,
                                            aggregate_func=average_cost)

        # challenger has enough runs and is better
        self.assertEqual(conf, self.config2, "conf: %s" % (conf))

    def test_compare_configs_inc(self):
        '''
            incumbent is better
        '''
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=None,
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config2,
                    cost=2,
                    time=2,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh,
                                            aggregate_func=average_cost)

        # challenger worse than inc
        self.assertEqual(conf, self.config1, "conf: %s" % (conf))

    def test_compare_configs_unknow(self):
        '''
            challenger is better but has less runs;
            -> no decision (None)
        '''
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=None,
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config1,
                    cost=1,
                    time=2,
                    status=StatusType.SUCCESS,
                    instance_id=2,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=2,
                    seed=None,
                    additional_info=None)

        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh,
                                            aggregate_func=average_cost)

        # challenger worse than inc
        self.assertIsNone(conf, "conf: %s" % (conf))

    def test_race_challenger(self):
        '''
           test _race_challenger without adaptive capping
        '''
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)

        self.assertEqual(inc, self.config2)

    def test_race_challenger(self):
        '''
           test _race_challenger with adaptive capping
        '''
        def target(x):
            time.sleep(1.5)
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats, run_obj="runtime")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=.001,
                    time=0.001,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=12345,
                    additional_info=None)

        # config2 should have a timeout (due to adaptive capping)
        # and config1 should still be the incumbent
        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)

        # self.assertTrue(False)
        self.assertEqual(inc, self.config1)
        self.assertLess(self.rh.get_cost(self.config2), 2,
                        self.rh.get_cost(self.config2))

        # get data for config2 to check that the correct run was performed
        run = self.rh.get_runs_for_config(self.config2)[0]
        config_id = self.rh.config_ids[self.config2]
        self.assertEqual(run.instance, 1, run.instance)
        self.assertEqual(run.seed, 12345, run.seed)

    def test_race_challenger_large(self):
        '''
           test _race_challenger using solution_quality
        '''
        def target(x):
            return 1

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=list(range(10)),
                                  deterministic=True)

        for i in range(10):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=12345,
                        additional_info=None)

        # tie on first instances and then challenger should always win
        # and be returned as inc
        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)

        # self.assertTrue(False)
        self.assertEqual(inc, self.config2)
        self.assertEqual(self.rh.get_cost(self.config2), 1,
                         self.rh.get_cost(self.config2))

        # get data for config2 to check that the correct run was performed
        runs = self.rh.get_runs_for_config(self.config2)
        self.assertEqual(len(runs), 10)

    def test_race_challenger_large_blocked_seed(self):
        '''
           test _race_challenger whether seeds are blocked for challenger runs
        '''
        def target(x):
            return 1

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=list(range(10)),
                                  deterministic=False)

        for i in range(10):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)

        # tie on first instances and then challenger should always win
        # and be returned as inc
        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)

        # self.assertTrue(False)
        self.assertEqual(inc, self.config2)
        self.assertEqual(self.rh.get_cost(self.config2), 1,
                         self.rh.get_cost(self.config2))

        # get data for config2 to check that the correct run was performed
        runs = self.rh.get_runs_for_config(self.config2)
        self.assertEqual(len(runs), 10)

        seeds = sorted([r.seed for r in runs])
        self.assertEqual(seeds, list(range(10)), seeds)

    def test_add_inc_run_det(self):
        '''
            test _add_inc_run()
        '''
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                run_obj="solution_quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  deterministic=True)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

        # since we assume deterministic=1,
        # the second call should not add any more runs
        # given only one instance
        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

    def test_add_inc_run_nondet(self):
        '''
            test _add_inc_run()
        '''
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                run_obj="solution_quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1, 2],
                                  deterministic=False)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 2, self.rh.data)
        runs = self.rh.get_runs_for_config(config=self.config1)
        # exactly one run on each instance
        self.assertIn(1, [runs[0].instance, runs[1].instance])
        self.assertIn(2, [runs[0].instance, runs[1].instance])

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 3, self.rh.data)

    def test_adaptive_capping(self):
        '''
            test _adapt_cutoff()
        '''
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=list(range(5)),
                                  deterministic=False)

        for i in range(5):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=i + 1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)
        for i in range(3):
            self.rh.add(config=self.config2,
                        cost=i + 1,
                        time=i + 1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)

        inst_seed_pairs = self.rh.get_runs_for_config(self.config1)
        # cost used by incumbent for going over all runs in inst_seed_pairs
        inc_sum_cost = sum_cost(config=self.config1,
                                instance_seed_pairs=inst_seed_pairs,
                                run_history=self.rh)

        cutoff = intensifier._adapt_cutoff(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           inc_sum_cost=inc_sum_cost)
        # 15*1.2 - 6
        self.assertEqual(cutoff, 12)

        intensifier.cutoff = 5

        cutoff = intensifier._adapt_cutoff(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           inc_sum_cost=inc_sum_cost)
        # scenario cutoff
        self.assertEqual(cutoff, 5)
class TestIntensify(unittest.TestCase):
    def setUp(self):
        unittest.TestCase.setUp(self)

        self.rh = RunHistory(aggregate_func=average_cost)
        self.cs = get_config_space()
        self.config1 = Configuration(self.cs, values={'a': 0, 'b': 100})
        self.config2 = Configuration(self.cs, values={'a': 100, 'b': 0})
        self.config3 = Configuration(self.cs, values={'a': 100, 'b': 100})

        self.scen = Scenario({
            "cutoff_time": 2,
            'cs': self.cs,
            "run_obj": 'runtime',
            "output_dir": ''
        })
        self.stats = Stats(scenario=self.scen)
        self.stats.start_timing()

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

    def test_compare_configs_no_joint_set(self):
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=None,
                                  instances=[1])

        for i in range(2):
            self.rh.add(config=self.config1,
                        cost=2,
                        time=2,
                        status=StatusType.SUCCESS,
                        instance_id=1,
                        seed=i,
                        additional_info=None)

        for i in range(2, 5):
            self.rh.add(config=self.config2,
                        cost=1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=1,
                        seed=i,
                        additional_info=None)

        # The sets for the incumbent are completely disjoint.
        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh,
                                            aggregate_func=average_cost)
        self.assertIsNone(conf)

        # The incumbent has still one instance-seed pair left on which the
        # challenger was not run yet.
        self.rh.add(config=self.config2,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=1,
                    additional_info=None)
        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh,
                                            aggregate_func=average_cost)
        self.assertIsNone(conf)

    def test_compare_configs_chall(self):
        '''
            challenger is better
        '''
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=None,
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=2,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config2,
                    cost=0,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh,
                                            aggregate_func=average_cost)

        # challenger has enough runs and is better
        self.assertEqual(conf, self.config2, "conf: %s" % (conf))

    def test_compare_configs_inc(self):
        '''
            incumbent is better
        '''
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=None,
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config2,
                    cost=2,
                    time=2,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh,
                                            aggregate_func=average_cost)

        # challenger worse than inc
        self.assertEqual(conf, self.config1, "conf: %s" % (conf))

    def test_compare_configs_unknow(self):
        '''
            challenger is better but has less runs;
            -> no decision (None)
        '''
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=None,
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config1,
                    cost=1,
                    time=2,
                    status=StatusType.SUCCESS,
                    instance_id=2,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=2,
                    seed=None,
                    additional_info=None)

        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh,
                                            aggregate_func=average_cost)

        # challenger worse than inc
        self.assertIsNone(conf, "conf: %s" % (conf))

    def test_race_challenger(self):
        '''
           test _race_challenger without adaptive capping
        '''
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)

        self.assertEqual(inc, self.config2)

    def test_race_challenger_2(self):
        '''
           test _race_challenger with adaptive capping
        '''
        def target(x):
            time.sleep(1.5)
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats, run_obj="runtime")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=.001,
                    time=0.001,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=12345,
                    additional_info=None)

        # config2 should have a timeout (due to adaptive capping)
        # and config1 should still be the incumbent
        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)

        # self.assertTrue(False)
        self.assertEqual(inc, self.config1)

    def test_race_challenger_3(self):
        '''
           test _race_challenger with adaptive capping on a previously capped configuration  
        '''
        def target(config: Configuration, seed: int, instance: str):
            if instance == 1:
                time.sleep(2.1)
            else:
                time.sleep(0.6)
            return (config['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                run_obj="runtime",
                                par_factor=1)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  cutoff=2,
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=0.5,
                    time=.5,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=12345,
                    additional_info=None)

        # config2 should have a timeout (due to adaptive capping)
        # and config1 should still be the incumbent
        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)
        # self.assertTrue(False)
        self.assertEqual(inc, self.config1)

        # further run for incumbent
        self.rh.add(config=self.config1,
                    cost=2,
                    time=2,
                    status=StatusType.TIMEOUT,
                    instance_id=2,
                    seed=12345,
                    additional_info=None)

        # give config2 a second chance
        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)

        # the incumbent should still be config1 because
        # config2 should get on inst 1 a full timeout
        # such that c(config1) = 1.25 and c(config2) close to 1.3
        self.assertEqual(inc, self.config1)
        # the capped run should not be counted in runs_perf_config
        self.assertAlmostEqual(self.rh.runs_per_config[2], 2)

    def test_race_challenger_large(self):
        '''
           test _race_challenger using solution_quality
        '''
        def target(x):
            return 1

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=list(range(10)),
                                  deterministic=True)

        for i in range(10):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=12345,
                        additional_info=None)

        # tie on first instances and then challenger should always win
        # and be returned as inc
        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)

        # self.assertTrue(False)
        self.assertEqual(inc, self.config2)
        self.assertEqual(self.rh.get_cost(self.config2), 1,
                         self.rh.get_cost(self.config2))

        # get data for config2 to check that the correct run was performed
        runs = self.rh.get_runs_for_config(self.config2)
        self.assertEqual(len(runs), 10)

    def test_race_challenger_large_blocked_seed(self):
        '''
           test _race_challenger whether seeds are blocked for challenger runs
        '''
        def target(x):
            return 1

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=list(range(10)),
                                  deterministic=False)

        for i in range(10):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)

        # tie on first instances and then challenger should always win
        # and be returned as inc
        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)

        # self.assertTrue(False)
        self.assertEqual(inc, self.config2)
        self.assertEqual(self.rh.get_cost(self.config2), 1,
                         self.rh.get_cost(self.config2))

        # get data for config2 to check that the correct run was performed
        runs = self.rh.get_runs_for_config(self.config2)
        self.assertEqual(len(runs), 10)

        seeds = sorted([r.seed for r in runs])
        self.assertEqual(seeds, list(range(10)), seeds)

    def test_add_inc_run_det(self):
        '''
            test _add_inc_run()
        '''
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                run_obj="solution_quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  deterministic=True)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

        # since we assume deterministic=1,
        # the second call should not add any more runs
        # given only one instance
        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

    def test_add_inc_run_nondet(self):
        '''
            test _add_inc_run()
        '''
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                run_obj="solution_quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1, 2],
                                  deterministic=False)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 2, self.rh.data)
        runs = self.rh.get_runs_for_config(config=self.config1)
        # exactly one run on each instance
        self.assertIn(1, [runs[0].instance, runs[1].instance])
        self.assertIn(2, [runs[0].instance, runs[1].instance])

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 3, self.rh.data)

    def test_adaptive_capping(self):
        '''
            test _adapt_cutoff()
        '''
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=list(range(5)),
                                  deterministic=False)

        for i in range(5):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=i + 1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)
        for i in range(3):
            self.rh.add(config=self.config2,
                        cost=i + 1,
                        time=i + 1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)

        inst_seed_pairs = self.rh.get_runs_for_config(self.config1)
        # cost used by incumbent for going over all runs in inst_seed_pairs
        inc_sum_cost = sum_cost(config=self.config1,
                                instance_seed_pairs=inst_seed_pairs,
                                run_history=self.rh)

        cutoff = intensifier._adapt_cutoff(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           inc_sum_cost=inc_sum_cost)
        # 15*1.2 - 6
        self.assertEqual(cutoff, 12)

        intensifier.cutoff = 5

        cutoff = intensifier._adapt_cutoff(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           inc_sum_cost=inc_sum_cost)
        # scenario cutoff
        self.assertEqual(cutoff, 5)
Beispiel #24
0
    def process_results(self,
                        run_info: RunInfo,
                        incumbent: typing.Optional[Configuration],
                        run_history: RunHistory,
                        time_bound: float,
                        result: RunValue,
                        log_traj: bool = True,
                        ) -> \
            typing.Tuple[Configuration, float]:
        """
        The intensifier stage will be updated based on the results/status
        of a configuration execution.

        During intensification, the following can happen:
        -> Challenger raced against incumbent
        -> Also, during a challenger run, a capped exception
           can be triggered, where no racer post processing is needed
        -> A run on the incumbent for more confidence needs to
           be processed, IntensifierStage.PROCESS_INCUMBENT_RUN
        -> The first run results need to be processed
           (PROCESS_FIRST_CONFIG_RUN)

        At the end of any run, checks are done to move to a new iteration.

        Parameters
        ----------
        run_info : RunInfo
               A RunInfo containing the configuration that was evaluated
        incumbent : typing.Optional[Configuration]
            best configuration so far, None in 1st run
        run_history : RunHistory
            stores all runs we ran so far
            if False, an evaluated configuration will not be generated again
        time_bound : float
            time in [sec] available to perform intensify
        result: RunValue
             Contain the result (status and other methadata) of exercising
             a challenger/incumbent.
        log_traj: bool
            whether to log changes of incumbents in trajectory

        Returns
        -------
        incumbent: Configuration()
            current (maybe new) incumbent configuration
        inc_perf: float
            empirical performance of incumbent configuration
        """
        if self.stage == IntensifierStage.PROCESS_FIRST_CONFIG_RUN:
            if incumbent is None:
                self.logger.info("First run, no incumbent provided;"
                                 " challenger is assumed to be the incumbent")
                incumbent = run_info.config

        if self.stage in [
                IntensifierStage.PROCESS_INCUMBENT_RUN,
                IntensifierStage.PROCESS_FIRST_CONFIG_RUN
        ]:
            self._ta_time += result.time
            self.num_run += 1
            self._process_inc_run(
                incumbent=incumbent,
                run_history=run_history,
                log_traj=log_traj,
            )

        else:
            self.num_run += 1
            self.num_chall_run += 1
            if result.status == StatusType.CAPPED:
                # move on to the next iteration
                self.logger.debug("Challenger itensification timed out due "
                                  "to adaptive capping.")
                self.stage = IntensifierStage.RUN_INCUMBENT
            else:

                self._ta_time += result.time
                incumbent = self._process_racer_results(
                    challenger=run_info.config,
                    incumbent=incumbent,
                    run_history=run_history,
                    log_traj=log_traj,
                )

        self.elapsed_time += (result.endtime - result.starttime)
        # check if 1 intensification run is complete - line 18
        # this is different to regular SMAC as it requires at least successful challenger run,
        # which is necessary to work on a fixed grid of configurations.
        if (self.stage == IntensifierStage.RUN_INCUMBENT
                and self._chall_indx >= self.min_chall
                and self.num_chall_run > 0):
            if self.num_run > self.run_limit:
                self.logger.info("Maximum #runs for intensification reached")
                self._next_iteration()

            if not self.use_ta_time_bound and self.elapsed_time - time_bound >= 0:
                self.logger.info(
                    "Wallclock time limit for intensification reached "
                    "(used: %f sec, available: %f sec)", self.elapsed_time,
                    time_bound)

                self._next_iteration()

            elif self._ta_time - time_bound >= 0:
                self.logger.info(
                    "TA time limit for intensification reached (used: %f sec, available: %f sec)",
                    self._ta_time, time_bound)

                self._next_iteration()

        inc_perf = run_history.get_cost(incumbent)

        return incumbent, inc_perf
Beispiel #25
0
class CAVE(object):
    """
    """
    def __init__(self,
                 folders: typing.List[str],
                 output: str,
                 ta_exec_dir: Union[str, None] = None,
                 missing_data_method: str = 'epm',
                 max_pimp_samples: int = -1,
                 fanova_pairwise=True):
        """
        Initialize CAVE facade to handle analyzing, plotting and building the
        report-page easily. During initialization, the analysis-infrastructure
        is built and the data is validated, meaning the overall best
        incumbent is found and default+incumbent are evaluated for all
        instances for all runs, by default using an EPM.
        The class holds two runhistories:
            self.original_rh -> only contains runs from the actual data
            self.validated_rh -> contains original runs and epm-predictions for
                                 all incumbents
        The analyze()-method performs an analysis and outputs a report.html.

        Arguments
        ---------
        folders: list<strings>
            paths to relevant SMAC runs
        output: string
            output for cave to write results (figures + report)
        ta_exec_dir: string
            execution directory for target algorithm (to find instance.txt, ..)
        missing_data_method: string
            from [validation, epm], how to estimate missing runs
        """
        self.logger = logging.getLogger("cave.cavefacade")
        self.logger.debug("Folders: %s", str(folders))
        self.ta_exec_dir = ta_exec_dir

        # Create output if necessary
        self.output = output
        self.logger.info("Saving results to %s", self.output)
        if not os.path.exists(output):
            self.logger.debug("Output-dir %s does not exist, creating",
                              self.output)
            os.makedirs(output)
        if not os.path.exists(os.path.join(self.output, "debug")):
            os.makedirs(os.path.join(self.output, "debug"))
        # Log to file
        logger = logging.getLogger()
        handler = logging.FileHandler(
            os.path.join(self.output, "debug/debug.log"), "w")
        handler.setLevel(logging.DEBUG)
        logger.addHandler(handler)

        # Global runhistory combines all actual runs of individual SMAC-runs
        # We save the combined (unvalidated) runhistory to disk, so we can use it later on.
        # We keep the validated runhistory (with as many runs as possible) in
        # memory. The distinction is made to avoid using runs that are
        # only estimated using an EPM for further EPMs or to handle runs
        # validated on different hardware (depending on validation-method).
        self.original_rh = RunHistory(average_cost)
        self.validated_rh = RunHistory(average_cost)

        # Save all relevant SMAC-runs in a list
        self.runs = []
        for folder in folders:
            try:
                self.logger.debug("Collecting data from %s.", folder)
                self.runs.append(SMACrun(folder, ta_exec_dir))
            except Exception as err:
                self.logger.warning(
                    "Folder %s could not be loaded, failed "
                    "with error message: %s", folder, err)
                continue
        if not len(self.runs):
            raise ValueError(
                "None of the specified SMAC-folders could be loaded.")

        # Use scenario of first run for general purposes (expecting they are all the same anyway!)
        self.scenario = self.runs[0].solver.scenario

        # Update global runhistory with all available runhistories
        self.logger.debug("Update original rh with all available rhs!")
        runhistory_fns = [
            os.path.join(run.folder, "runhistory.json") for run in self.runs
        ]
        for rh_file in runhistory_fns:
            self.original_rh.update_from_json(rh_file, self.scenario.cs)
        self.logger.debug(
            'Combined number of Runhistory data points: %d. '
            '# Configurations: %d. # Runhistories: %d',
            len(self.original_rh.data),
            len(self.original_rh.get_all_configs()), len(runhistory_fns))
        self.original_rh.save_json(
            os.path.join(self.output, "combined_rh.json"))

        # Validator for a) validating with epm, b) plot over time
        # Initialize without trajectory
        self.validator = Validator(self.scenario, None, None)

        # Estimate missing costs for [def, inc1, inc2, ...]
        self.complete_data(method=missing_data_method)
        self.best_run = min(
            self.runs,
            key=lambda run: self.validated_rh.get_cost(run.solver.incumbent))

        self.default = self.scenario.cs.get_default_configuration()
        self.incumbent = self.best_run.solver.incumbent

        self.logger.debug("Overall best run: %s, with incumbent: %s",
                          self.best_run.folder, self.incumbent)

        # Following variable determines whether a distinction is made
        # between train and test-instances (e.g. in plotting)
        self.train_test = bool(self.scenario.train_insts != [None]
                               and self.scenario.test_insts != [None])

        self.analyzer = Analyzer(self.original_rh, self.validated_rh,
                                 self.default, self.incumbent, self.train_test,
                                 self.scenario, self.validator, self.output,
                                 max_pimp_samples, fanova_pairwise)

        self.builder = HTMLBuilder(self.output, "CAVE")
        # Builder for html-website
        self.website = OrderedDict([])

    def complete_data(self, method="epm"):
        """Complete missing data of runs to be analyzed. Either using validation
        or EPM.
        """
        with changedir(self.ta_exec_dir if self.ta_exec_dir else '.'):
            self.logger.info("Completing data using %s.", method)

            path_for_validated_rhs = os.path.join(self.output, "validated_rhs")
            for run in self.runs:
                self.validator.traj = run.traj
                if method == "validation":
                    # TODO determine # repetitions
                    new_rh = self.validator.validate(
                        'def+inc',
                        'train+test',
                        1,
                        -1,
                        runhistory=self.original_rh)
                elif method == "epm":
                    new_rh = self.validator.validate_epm(
                        'def+inc',
                        'train+test',
                        1,
                        runhistory=self.original_rh)
                else:
                    raise ValueError("Missing data method illegal (%s)",
                                     method)
                self.validator.traj = None  # Avoid usage-mistakes
                self.validated_rh.update(new_rh)

    def analyze(self,
                performance=True,
                cdf=True,
                scatter=True,
                confviz=True,
                param_importance=['forward_selection', 'ablation', 'fanova'],
                feature_analysis=[
                    "box_violin", "correlation", "feat_importance",
                    "clustering", "feature_cdf"
                ],
                parallel_coordinates=True,
                cost_over_time=True,
                algo_footprint=True):
        """Analyze the available data and build HTML-webpage as dict.
        Save webpage in 'self.output/CAVE/report.html'.
        Analyzing is performed with the analyzer-instance that is initialized in
        the __init__

        Parameters
        ----------
        performance: bool
            whether to calculate par10-values
        cdf: bool
            whether to plot cdf
        scatter: bool
            whether to plot scatter
        confviz: bool
            whether to perform configuration visualization
        param_importance: List[str]
            containing methods for parameter importance
        feature_analysis: List[str]
            containing methods for feature analysis
        parallel_coordinates: bool
            whether to plot parallel coordinates
        cost_over_time: bool
            whether to plot cost over time
        algo_footprint: bool
            whether to plot algorithm footprints
        """

        # Check arguments
        for p in param_importance:
            if p not in [
                    'forward_selection', 'ablation', 'fanova', 'incneighbor'
            ]:
                raise ValueError(
                    "%s not a valid option for parameter "
                    "importance!", p)
        for f in feature_analysis:
            if f not in [
                    "box_violin", "correlation", "importance", "clustering",
                    "feature_cdf"
            ]:
                raise ValueError("%s not a valid option for feature analysis!",
                                 f)

        # Start analysis
        overview = self.analyzer.create_overview_table(self.best_run.folder)
        self.website["Meta Data"] = {"table": overview}

        compare_config = self.analyzer.config_to_html(self.default,
                                                      self.incumbent)
        self.website["Best configuration"] = {"table": compare_config}

        ########## PERFORMANCE ANALYSIS
        self.website["Performance Analysis"] = OrderedDict()

        if performance:
            performance_table = self.analyzer.create_performance_table(
                self.default, self.incumbent)
            self.website["Performance Analysis"]["Performance Table"] = {
                "table": performance_table
            }

        if cdf:
            cdf_path = self.analyzer.plot_cdf()
            self.website["Performance Analysis"][
                "empirical Cumulative Distribution Function (eCDF)"] = {
                    "figure": cdf_path
                }

        if scatter and (self.scenario.train_insts != [[None]]):
            scatter_path = self.analyzer.plot_scatter()
            self.website["Performance Analysis"]["Scatterplot"] = {
                "figure": scatter_path
            }
        elif scatter:
            self.logger.info(
                "Scatter plot desired, but no instances available.")

        # Build report before time-consuming analysis
        self.build_website()

        if algo_footprint and self.scenario.feature_dict:
            algorithms = {self.default: "default", self.incumbent: "incumbent"}
            # Add all available incumbents to test portfolio strategy
            #for r in self.runs:
            #    if not r.get_incumbent() in algorithms:
            #        algorithms[r.get_incumbent()] = str(self.runs.index(r))

            algo_footprint_plots = self.analyzer.plot_algorithm_footprint(
                algorithms)
            self.website["Performance Analysis"][
                "Algorithm Footprints"] = OrderedDict()
            for p in algo_footprint_plots:
                header = os.path.splitext(os.path.split(p)[1])[0]  # algo name
                self.website["Performance Analysis"]["Algorithm Footprints"][
                    header] = {
                        "figure": p,
                        "tooltip":
                        get_tooltip("Algorithm Footprints") + ": " + header
                    }

        self.build_website()

        ########### Configurator's behavior
        self.website["Configurator's behavior"] = OrderedDict()

        if confviz:
            if self.scenario.feature_array is None:
                self.scenario.feature_array = np.array([[]])
            # Sort runhistories and incs wrt cost
            incumbents = [r.solver.incumbent for r in self.runs]
            trajectories = [r.traj for r in self.runs]
            runhistories = [r.runhistory for r in self.runs]
            costs = [self.validated_rh.get_cost(i) for i in incumbents]
            costs, incumbents, runhistories, trajectories = (
                list(t) for t in zip(
                    *sorted(zip(costs, incumbents, runhistories, trajectories),
                            key=lambda x: x[0])))
            incumbents = list(map(lambda x: x['incumbent'], trajectories[0]))

            confviz_script = self.analyzer.plot_confviz(
                incumbents, runhistories)
            self.website["Configurator's behavior"][
                "Configurator Footprint"] = {
                    "table": confviz_script
                }
        elif confviz:
            self.logger.info("Configuration visualization desired, but no "
                             "instance-features available.")

        self.build_website()

        if cost_over_time:
            cost_over_time_path = self.analyzer.plot_cost_over_time(
                self.best_run.traj, self.validator)
            self.website["Configurator's behavior"]["Cost over time"] = {
                "figure": cost_over_time_path
            }

        self.build_website()

        self.parameter_importance(ablation='ablation' in param_importance,
                                  fanova='fanova' in param_importance,
                                  forward_selection='forward_selection'
                                  in param_importance,
                                  incneighbor='incneighbor'
                                  in param_importance)

        self.build_website()

        if parallel_coordinates:
            # Should be after parameter importance, if performed.
            n_params = 6
            parallel_path = self.analyzer.plot_parallel_coordinates(n_params)
            self.website["Configurator's behavior"]["Parallel Coordinates"] = {
                "figure": parallel_path
            }

        self.build_website()

        if self.scenario.feature_dict:
            self.feature_analysis(box_violin='box_violin' in feature_analysis,
                                  correlation='correlation'
                                  in feature_analysis,
                                  clustering='clustering' in feature_analysis,
                                  importance='importance' in feature_analysis)
        else:
            self.logger.info('No feature analysis possible')

        self.logger.info("CAVE finished. Report is located in %s",
                         os.path.join(self.output, 'report.html'))

        self.build_website()

    def parameter_importance(self,
                             ablation=False,
                             fanova=False,
                             forward_selection=False,
                             incneighbor=False):
        """Perform the specified parameter importance procedures. """
        # PARAMETER IMPORTANCE
        if (ablation or forward_selection or fanova or incneighbor):
            self.website["Parameter Importance"] = OrderedDict()
        sum_ = 0
        if fanova:
            sum_ += 1
            table, plots, pair_plots = self.analyzer.fanova(self.incumbent)

            self.website["Parameter Importance"]["fANOVA"] = OrderedDict()

            self.website["Parameter Importance"]["fANOVA"]["Importance"] = {
                "table": table
            }
            # Insert plots (the received plots is a dict, mapping param -> path)
            self.website["Parameter Importance"]["fANOVA"][
                "Marginals"] = OrderedDict([])
            for param, plot in plots.items():
                self.website["Parameter Importance"]["fANOVA"]["Marginals"][
                    param] = {
                        "figure": plot
                    }
            if pair_plots:
                self.website["Parameter Importance"]["fANOVA"][
                    "PairwiseMarginals"] = OrderedDict([])
                for param, plot in pair_plots.items():
                    self.website["Parameter Importance"]["fANOVA"][
                        "PairwiseMarginals"][param] = {
                            "figure": plot
                        }

        if ablation:
            sum_ += 1
            self.logger.info("Ablation...")
            self.analyzer.parameter_importance("ablation", self.incumbent,
                                               self.output)
            ablationpercentage_path = os.path.join(self.output,
                                                   "ablationpercentage.png")
            ablationperformance_path = os.path.join(self.output,
                                                    "ablationperformance.png")
            self.website["Parameter Importance"]["Ablation"] = {
                "figure": [ablationpercentage_path, ablationperformance_path]
            }

        if forward_selection:
            sum_ += 1
            self.logger.info("Forward Selection...")
            self.analyzer.parameter_importance("forward-selection",
                                               self.incumbent, self.output)
            f_s_barplot_path = os.path.join(self.output,
                                            "forward selection-barplot.png")
            f_s_chng_path = os.path.join(self.output,
                                         "forward selection-chng.png")
            self.website["Parameter Importance"]["Forward Selection"] = {
                "figure": [f_s_barplot_path, f_s_chng_path]
            }

        if incneighbor:
            sum_ += 1
            self.logger.info("Local EPM-predictions around incumbent...")
            plots = self.analyzer.local_epm_plots()
            self.website["Parameter Importance"][
                "Local Parameter Importance (LPI)"] = OrderedDict([])
            for param, plot in plots.items():
                self.website["Parameter Importance"][
                    "Local Parameter Importance (LPI)"][param] = {
                        "figure": plot
                    }

        if sum_:
            of = os.path.join(self.output, 'pimp.tex')
            self.logger.info('Creating pimp latex table at %s' % of)
            self.analyzer.pimp.table_for_comparison(self.analyzer.evaluators,
                                                    of,
                                                    style='latex')

    def feature_analysis(self,
                         box_violin=False,
                         correlation=False,
                         clustering=False,
                         importance=False):
        if not (box_violin or correlation or clustering or importance):
            self.logger.debug("No feature analysis.")
            return

        # FEATURE ANALYSIS (ASAPY)
        # TODO make the following line prettier
        # TODO feat-names from scenario?
        in_reader = InputReader()
        feat_fn = self.scenario.feature_fn

        if not self.scenario.feature_names:
            with changedir(self.ta_exec_dir if self.ta_exec_dir else '.'):
                if not feat_fn or not os.path.exists(feat_fn):
                    self.logger.warning(
                        "Feature Analysis needs valid feature "
                        "file! Either {} is not a valid "
                        "filename or features are not saved in "
                        "the scenario.")
                    self.logger.error("Skipping Feature Analysis.")
                    return
                else:
                    feat_names = in_reader.read_instance_features_file(
                        self.scenario.feature_fn)[0]
        else:
            feat_names = copy.deepcopy(self.scenario.feature_names)

        self.website["Feature Analysis"] = OrderedDict([])

        # feature importance using forward selection
        if importance:
            self.website["Feature Analysis"][
                "Feature Importance"] = OrderedDict()
            imp, plots = self.analyzer.feature_importance()
            imp = DataFrame(data=list(imp.values()),
                            index=list(imp.keys()),
                            columns=["Error"])
            imp = imp.to_html()  # this is a table with the values in html
            self.website["Feature Analysis"]["Feature Importance"]["Table"] = {
                "table": imp
            }
            for p in plots:
                name = os.path.splitext(os.path.basename(p))[0]
                self.website["Feature Analysis"]["Feature Importance"][
                    name] = {
                        "figure": p
                    }

        # box and violin plots
        if box_violin:
            name_plots = self.analyzer.feature_analysis(
                'box_violin', feat_names)
            self.website["Feature Analysis"][
                "Violin and Box Plots"] = OrderedDict()
            for plot_tuple in name_plots:
                key = "%s" % (plot_tuple[0])
                self.website["Feature Analysis"]["Violin and Box Plots"][
                    key] = {
                        "figure": plot_tuple[1]
                    }

        # correlation plot
        if correlation:
            correlation_plot = self.analyzer.feature_analysis(
                'correlation', feat_names)
            if correlation_plot:
                self.website["Feature Analysis"]["Correlation"] = {
                    "figure": correlation_plot
                }

        # cluster instances in feature space
        if clustering:
            cluster_plot = self.analyzer.feature_analysis(
                'clustering', feat_names)
            self.website["Feature Analysis"]["Clustering"] = {
                "figure": cluster_plot
            }

        self.build_website()

    def build_website(self):
        self.builder.generate_html(self.website)
Beispiel #26
0
    def _process_racer_results(
        self,
        challenger: Configuration,
        incumbent: Configuration,
        run_history: RunHistory,
        log_traj: bool = True,
    ) -> typing.Optional[Configuration]:
        """Process the result of a racing configuration against the
        current incumbent. Might propose a new incumbent.

        Parameters
        ----------
        challenger : Configuration
            Configuration which challenges incumbent
        incumbent : Configuration
            Best configuration so far
        run_history : RunHistory
            Stores all runs we ran so far

        Returns
        -------
        new_incumbent: typing.Optional[Configuration]
            Either challenger or incumbent
        """
        chal_runs = run_history.get_runs_for_config(
            challenger, only_max_observed_budget=True)
        chal_perf = run_history.get_cost(challenger)
        # if all <instance, seed> have been run, compare challenger performance
        if not self.to_run:
            new_incumbent = self._compare_configs(incumbent=incumbent,
                                                  challenger=challenger,
                                                  run_history=run_history,
                                                  log_traj=log_traj)

            # update intensification stage
            if new_incumbent == incumbent:
                # move on to the next iteration
                self.stage = IntensifierStage.RUN_INCUMBENT
                self.continue_challenger = False
                self.logger.debug(
                    'Estimated cost of challenger on %d runs: %.4f, but worse than incumbent',
                    len(chal_runs), chal_perf)

            elif new_incumbent == challenger:
                # New incumbent found
                incumbent = challenger
                self.continue_challenger = False
                # compare against basis configuration if provided, else go to next iteration
                if self.always_race_against and \
                        self.always_race_against != challenger:
                    self.stage = IntensifierStage.RUN_BASIS
                else:
                    self.stage = IntensifierStage.RUN_INCUMBENT
                self.logger.debug(
                    'Estimated cost of challenger on %d runs: %.4f, becomes new incumbent',
                    len(chal_runs), chal_perf)

            else:  # Line 17
                # challenger is not worse, continue
                self.N = 2 * self.N
                self.continue_challenger = True
                self.logger.debug(
                    'Estimated cost of challenger on %d runs: %.4f, adding %d runs to the queue',
                    len(chal_runs), chal_perf, self.N / 2)
        else:
            self.logger.debug(
                'Estimated cost of challenger on %d runs: %.4f, still %d runs to go (continue racing)',
                len(chal_runs), chal_perf, len(self.to_run))

        return incumbent
Beispiel #27
0
    def _race_challenger(self, challenger: Configuration,
                         incumbent: Configuration, run_history: RunHistory,
                         aggregate_func: typing.Callable):
        '''
            aggressively race challenger against incumbent

            Parameters
            ----------
            challenger : Configuration
                configuration which challenges incumbent
            incumbent : Configuration
                best configuration so far
            run_history : RunHistory
                stores all runs we ran so far
            aggregate_func: typing.Callable
                aggregate performance across instances

            Returns
            -------
            new_incumbent: Configuration
                either challenger or incumbent
        '''
        # at least one run of challenger
        # to increase chall_indx counter
        first_run = False
        inc_perf = run_history.get_cost(incumbent)

        learning_curve = []

        self._num_run += 1
        self._chall_indx += 1

        pc = None
        for epoch in range(self.max_epochs):
            status, cost, time, add_info = self.tae_runner.start(
                config=challenger,
                instance=None,
                seed=0,
                cutoff=2**32 - 1,
                instance_specific=None,
                pc=pc)
            try:
                pc = add_info["model"]
            except KeyError:  # model building failed, e.g. because of nan
                break

            learning_curve.append(cost)

            if len(self.learning_curves) > 10 and epoch > self.max_epochs / 4:
                seen_curves = np.array(self.learning_curves)[:, epoch]
                if cost > np.median(seen_curves):
                    self.logger.info("Abort run (%f vs %f)" %
                                     (cost, np.median(seen_curves)))
                    break

        # delete model in runhistory to be more memory efficient
        chall_id = run_history.config_ids[challenger]
        runkey = RunKey(chall_id, None, 0)
        runvalue = run_history.data[runkey]
        try:
            del runvalue.additional_info["model"]
        except KeyError:
            pass

        if epoch == self.max_epochs - 1:
            self.learning_curves.append(learning_curve)

        chal_perf = cost

        if cost < inc_perf:
            self.logger.info(
                "Challenger (%.4f) is better than incumbent (%.4f)" %
                (chal_perf, inc_perf))
            # Show changes in the configuration
            params = sorted([(param, incumbent[param], challenger[param])
                             for param in challenger.keys()])
            self.logger.info("Changes in incumbent:")
            for param in params:
                if param[1] != param[2]:
                    self.logger.info("  %s : %r -> %r" % (param))
                else:
                    self.logger.debug("  %s remains unchanged: %r" %
                                      (param[0], param[1]))
            incumbent = challenger
            self.stats.inc_changed += 1
            self.traj_logger.add_entry(train_perf=chal_perf,
                                       incumbent_id=self.stats.inc_changed,
                                       incumbent=challenger)
        else:
            self.logger.debug(
                "Incumbent (%.4f) is better than challenger (%.4f)" %
                (inc_perf, chal_perf))

        return incumbent
Beispiel #28
0
    def intensify(self,
                  challengers: typing.List[Configuration],
                  incumbent: Configuration,
                  run_history: RunHistory,
                  aggregate_func: typing.Callable,
                  time_bound: int = MAXINT):
        '''
            running intensification to determine the incumbent configuration
            Side effect: adds runs to run_history

            Implementation of Procedure 2 in Hutter et al. (2011).

            Parameters
            ----------

            challengers : typing.List[Configuration]
                promising configurations
            incumbent : Configuration
                best configuration so far
            run_history : RunHistory
                stores all runs we ran so far
            aggregate_func: typing.Callable
                aggregate performance across instances
            time_bound : int, optional (default=2 ** 31 - 1)
                time in [sec] available to perform intensify

            Returns
            -------
            incumbent: Configuration()
                current (maybe new) incumbent configuration
            inc_perf: float
                empirical performance of incumbent configuration
        '''

        self.start_time = time.time()

        if time_bound < 0.01:
            raise ValueError("time_bound must be >= 0.01")

        self._num_run = 0
        self._chall_indx = 0

        # Line 1 + 2
        for challenger in challengers:
            if challenger == incumbent:
                self.logger.warning(
                    "Challenger was the same as the current incumbent; Skipping challenger"
                )
                continue

            self.logger.debug("Intensify on %s", challenger)
            if hasattr(challenger, 'origin'):
                self.logger.debug("Configuration origin: %s",
                                  challenger.origin)

            try:
                # Lines 3-7
                self._add_inc_run(incumbent=incumbent, run_history=run_history)

                # Lines 8-17
                incumbent = self._race_challenger(
                    challenger=challenger,
                    incumbent=incumbent,
                    run_history=run_history,
                    aggregate_func=aggregate_func)
            except BudgetExhaustedException:
                # We return incumbent, SMBO stops due to its own budget checks
                inc_perf = run_history.get_cost(incumbent)
                self.logger.debug("Budget exhausted; Return incumbent")
                return incumbent, inc_perf

            if self._chall_indx > 1 and self._num_run > self.run_limit:
                self.logger.debug("Maximum #runs for intensification reached")
                break
            elif self._chall_indx > 1 and time.time(
            ) - self.start_time - time_bound >= 0:
                self.logger.debug("Timelimit for intensification reached ("
                                  "used: %f sec, available: %f sec)" %
                                  (time.time() - self.start_time, time_bound))
                break

        # output estimated performance of incumbent
        inc_runs = run_history.get_runs_for_config(incumbent)
        inc_perf = aggregate_func(incumbent, run_history, inc_runs)
        self.logger.info(
            "Updated estimated performance of incumbent on %d runs: %.4f" %
            (len(inc_runs), inc_perf))

        self.stats.update_average_configs_per_intensify(
            n_configs=self._chall_indx)

        return incumbent, inc_perf
Beispiel #29
0
class SMBO(BaseSolver):
    def __init__(self,
                 scenario,
                 tae_runner=None,
                 acquisition_function=None,
                 model=None,
                 runhistory2epm=None,
                 stats=None,
                 rng=None):
        '''
        Interface that contains the main Bayesian optimization loop

        Parameters
        ----------
        scenario: smac.scenario.scenario.Scenario
            Scenario object
        tae_runner: object
            object that implements the following method to call the target
            algorithm (or any other arbitrary function):
            run(self, config)
            If not set, it will be initialized with the tae.ExecuteTARunOld()
        acquisition_function : AcquisitionFunction
            Object that implements the AbstractAcquisitionFunction. Will use
            EI if not set.
        model : object
            Model that implements train() and predict(). Will use a
            RandomForest if not set.
        runhistory2epm : RunHistory2EMP
            Object that implements the AbstractRunHistory2EPM. If None,
            will use RunHistory2EPM4Cost if objective is cost or
            RunHistory2EPM4LogCost if objective is runtime.
        stats: Stats
            optional stats object
        rng: numpy.random.RandomState
            Random number generator
        '''

        if stats:
            self.stats = stats
        else:
            self.stats = Stats(scenario)

        self.runhistory = RunHistory()

        self.logger = logging.getLogger("smbo")

        if rng is None:
            self.num_run = np.random.randint(1234567980)
            self.rng = np.random.RandomState(seed=self.num_run)
        elif isinstance(rng, int):
            self.num_run = rng
            self.rng = np.random.RandomState(seed=rng)
        elif isinstance(rng, np.random.RandomState):
            self.num_run = rng.randint(1234567980)
            self.rng = rng
        else:
            raise TypeError('Unknown type %s for argument rng. Only accepts '
                            'None, int or np.random.RandomState' %
                            str(type(rng)))

        self.scenario = scenario
        self.config_space = scenario.cs
        self.traj_logger = TrajLogger(output_dir=self.scenario.output_dir,
                                      stats=self.stats)

        self.types = get_types(self.config_space, scenario.feature_array)
        if model is None:
            self.model = RandomForestWithInstances(
                self.types,
                scenario.feature_array,
                seed=self.rng.randint(1234567980))
        else:
            self.model = model

        if acquisition_function is None:
            self.acquisition_func = EI(self.model)
        else:
            self.acquisition_func = acquisition_function

        self.local_search = LocalSearch(self.acquisition_func,
                                        self.config_space)
        self.incumbent = None

        if tae_runner is None:
            self.executor = ExecuteTARunOld(ta=scenario.ta,
                                            stats=self.stats,
                                            run_obj=scenario.run_obj,
                                            par_factor=scenario.par_factor)
        else:
            self.executor = tae_runner

        self.inten = Intensifier(
            executor=self.executor,
            stats=self.stats,
            traj_logger=self.traj_logger,
            instances=self.scenario.train_insts,
            cutoff=self.scenario.cutoff,
            deterministic=self.scenario.deterministic,
            run_obj_time=self.scenario.run_obj == "runtime",
            instance_specifics=self.scenario.instance_specific)

        num_params = len(self.config_space.get_hyperparameters())

        self.objective = average_cost
        if self.scenario.run_obj == "runtime":

            if runhistory2epm is None:
                # if we log the performance data,
                # the RFRImputator will already get
                # log transform data from the runhistory
                cutoff = np.log10(self.scenario.cutoff)
                threshold = np.log10(self.scenario.cutoff *
                                     self.scenario.par_factor)

                imputor = RFRImputator(cs=self.config_space,
                                       rs=self.rng,
                                       cutoff=cutoff,
                                       threshold=threshold,
                                       model=self.model,
                                       change_threshold=0.01,
                                       max_iter=10)
                self.rh2EPM = RunHistory2EPM4LogCost(scenario=self.scenario,
                                                     num_params=num_params,
                                                     success_states=[
                                                         StatusType.SUCCESS,
                                                     ],
                                                     impute_censored_data=True,
                                                     impute_state=[
                                                         StatusType.TIMEOUT,
                                                     ],
                                                     imputor=imputor)
            else:
                self.rh2EPM = runhistory2epm

        elif self.scenario.run_obj == 'quality':
            if runhistory2epm is None:
                self.rh2EPM = RunHistory2EPM4Cost\
                    (scenario=self.scenario, num_params=num_params,
                     success_states=[StatusType.SUCCESS, ],
                     impute_censored_data=False, impute_state=None)
            else:
                self.rh2EPM = runhistory2epm

        else:
            raise ValueError('Unknown run objective: %s. Should be either '
                             'quality or runtime.' % self.scenario.run_obj)

    def run_initial_design(self):
        '''
            runs algorithm runs for a initial design;
            default implementation: running the default configuration on
                                    a random instance-seed pair
            Side effect: adds runs to self.runhistory

            Returns
            -------
            incumbent: Configuration()
                initial incumbent configuration
        '''

        default_conf = self.config_space.get_default_configuration()
        self.incumbent = default_conf

        # add this incumbent right away to have an entry to time point 0
        self.traj_logger.add_entry(train_perf=2**31,
                                   incumbent_id=1,
                                   incumbent=self.incumbent)

        rand_inst_id = self.rng.randint(0, len(self.scenario.train_insts))
        # ignore instance specific values
        rand_inst = self.scenario.train_insts[rand_inst_id]

        if self.scenario.deterministic:
            initial_seed = 0
        else:
            initial_seed = random.randint(0, MAXINT)

        status, cost, runtime, additional_info = self.executor.start(
            default_conf,
            instance=rand_inst,
            cutoff=self.scenario.cutoff,
            seed=initial_seed,
            instance_specific=self.scenario.instance_specific.get(
                rand_inst, "0"))

        if status in [StatusType.CRASHED or StatusType.ABORT]:
            self.logger.critical("First run crashed -- Abort")
            sys.exit(1)

        self.runhistory.add(config=default_conf,
                            cost=cost,
                            time=runtime,
                            status=status,
                            instance_id=rand_inst,
                            seed=initial_seed,
                            additional_info=additional_info)
        defaul_inst_seeds = set(
            self.runhistory.get_runs_for_config(default_conf))
        default_perf = self.objective(default_conf, self.runhistory,
                                      defaul_inst_seeds)
        self.runhistory.update_cost(default_conf, default_perf)

        self.stats.inc_changed += 1  # first incumbent

        self.traj_logger.add_entry(train_perf=default_perf,
                                   incumbent_id=self.stats.inc_changed,
                                   incumbent=self.incumbent)

        return default_conf

    def run(self, max_iters=10):
        '''
        Runs the Bayesian optimization loop for max_iters iterations

        Parameters
        ----------
        max_iters: int
            The maximum number of iterations

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        '''
        self.stats.start_timing()

        #self.runhistory = RunHisory()

        self.incumbent = self.run_initial_design()

        # Main BO loop
        iteration = 1
        while True:
            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_directory=self.scenario.output_dir,
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()
            X, Y = self.rh2EPM.transform(self.runhistory)

            self.logger.debug("Search for next configuration")
            # get all found configurations sorted according to acq
            challengers = self.choose_next(X, Y)

            time_spend = time.time() - start_time
            logging.debug(
                "Time spend to choose next configurations: %.2f sec" %
                (time_spend))

            self.logger.debug("Intensify")

            self.incumbent, inc_perf = self.inten.intensify(
                challengers=challengers,
                incumbent=self.incumbent,
                run_history=self.runhistory,
                objective=self.objective,
                time_bound=max(0.01, time_spend))

            # TODO: Write run history into database
            if self.scenario.shared_model:
                pSMAC.write(run_history=self.runhistory,
                            output_directory=self.scenario.output_dir,
                            num_run=self.num_run)

            if iteration == max_iters:
                break

            iteration += 1

            logging.debug(
                "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)"
                % (self.stats.get_remaing_time_budget(),
                   self.stats.get_remaining_ta_budget(),
                   self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent

    def choose_next(self,
                    X,
                    Y,
                    num_interleaved_random=1010,
                    num_configurations_by_random_search_sorted=1000,
                    num_configurations_by_local_search=10):
        """Choose next candidate solution with Bayesian optimization.

        Parameters
        ----------
        X : (N, D) numpy array
            Each row contains a configuration and one set of
            instance features.
        Y : (N, O) numpy array
            The function values for each configuration instance pair.

        Returns
        -------
        list
            List of 2020 suggested configurations to evaluate.
        """
        self.model.train(X, Y)

        if self.runhistory.empty():
            incumbent_value = 0.0
        elif self.incumbent is None:
            # TODO try to calculate an incumbent from the runhistory!
            incumbent_value = 0.0
        else:
            incumbent_value = self.runhistory.get_cost(self.incumbent)

        self.acquisition_func.update(model=self.model, eta=incumbent_value)

        # Remove dummy acquisition function value
        next_configs_by_random_search = [
            x[1] for x in self._get_next_by_random_search(
                num_points=num_interleaved_random)
        ]

        # Get configurations sorted by EI
        next_configs_by_random_search_sorted = \
            self._get_next_by_random_search(
                num_configurations_by_random_search_sorted, _sorted=True)
        next_configs_by_local_search = \
            self._get_next_by_local_search(num_configurations_by_local_search)

        next_configs_by_acq_value = next_configs_by_random_search_sorted + \
            next_configs_by_local_search
        next_configs_by_acq_value.sort(reverse=True, key=lambda x: x[0])
        self.logger.debug(
            "First 10 acq func values of selected configurations: %s" %
            (str([_[0] for _ in next_configs_by_acq_value[:10]])))
        next_configs_by_acq_value = [_[1] for _ in next_configs_by_acq_value]

        challengers = list(
            itertools.chain(*zip(next_configs_by_acq_value,
                                 next_configs_by_random_search)))
        return challengers

    def _get_next_by_random_search(self, num_points=1000, _sorted=False):
        """Get candidate solutions via local search.

        Parameters
        ----------
        num_points : int, optional (default=10)
            Number of local searches and returned values.

        _sorted : bool, optional (default=True)
            Whether to sort the candidate solutions by acquisition function
            value.

        Returns
        -------
        list : (acquisition value, Candidate solutions)
        """

        rand_configs = self.config_space.sample_configuration(size=num_points)
        if _sorted:
            imputed_rand_configs = map(ConfigSpace.util.impute_inactive_values,
                                       rand_configs)
            imputed_rand_configs = [
                x.get_array() for x in imputed_rand_configs
            ]
            imputed_rand_configs = np.array(imputed_rand_configs,
                                            dtype=np.float64)
            acq_values = self.acquisition_func(imputed_rand_configs)
            # From here
            # http://stackoverflow.com/questions/20197990/how-to-make-argsort-result-to-be-random-between-equal-values
            random = self.rng.rand(len(acq_values))
            # Last column is primary sort key!
            indices = np.lexsort((random.flatten(), acq_values.flatten()))

            for i in range(len(rand_configs)):
                rand_configs[i].origin = 'Random Search (sorted)'

            # Cannot use zip here because the indices array cannot index the
            # rand_configs list, because the second is a pure python list
            return [(acq_values[ind][0], rand_configs[ind])
                    for ind in indices[::-1]]
        else:
            for i in range(len(rand_configs)):
                rand_configs[i].origin = 'Random Search'
            return [(0, rand_configs[i]) for i in range(len(rand_configs))]

    def _get_next_by_local_search(self, num_points=10):
        """Get candidate solutions via local search.

        In case acquisition function values tie, these will be broken randomly.

        Parameters
        ----------
        num_points : int, optional (default=10)
            Number of local searches and returned values.

        Returns
        -------
        list : (acquisition value, Candidate solutions),
               ordered by their acquisition function value
        """
        configs_acq = []

        # Start N local search from different random start points
        for i in range(num_points):
            if i == 0 and self.incumbent is not None:
                start_point = self.incumbent
            else:
                start_point = self.config_space.sample_configuration()

            configuration, acq_val = self.local_search.maximize(start_point)

            configuration.origin = 'Local Search'
            configs_acq.append((acq_val[0][0], configuration))

        # shuffle for random tie-break
        random.shuffle(configs_acq, self.rng.rand)

        # sort according to acq value
        # and return n best configurations
        configs_acq.sort(reverse=True, key=lambda x: x[0])

        return configs_acq
Beispiel #30
0
    def __init__(
        self,
        original_rh: RunHistory,
        validated_rh: RunHistory,
        validator: Validator,
        scenario: Scenario,
        default: Configuration,
        incumbent: Configuration,
        param_imp: Union[None, Dict[str, float]],
        params: Union[int, List[str]],
        n_configs: int,
        pc_sort_by: str,
        output_dir: str,
        cs: ConfigurationSpace,
        runtime: bool = False,
        max_runs_epm: int = 3000000,
    ):
        """This function prepares the data from a SMAC-related
        format (using runhistories and parameters) to a more general format
        (using a dataframe). The resulting dataframe is passed to the
        parallel_coordinates-routine

        Parameters
        ----------
        original_rh: RunHistory
            runhistory that should contain only runs that were executed during search
        validated_rh: RunHistory
            runhistory that may contain as many runs as possible, also external runs.
            this runhistory will be used to build the EPM
        validator: Validator
            validator to be used to estimate costs for configurations
        scenario: Scenario
            scenario object to take instances from
        default, incumbent: Configuration
            default and incumbent, they will surely be displayed
        param_imp: Union[None, Dict[str->float]
            if given, maps parameter-names to importance
        params: Union[int, List[str]]
            either directly the parameters to displayed or the number of parameters (will try to define the most
            important ones
        n_configs: int
            number of configs to be plotted
        pc_sort_by: str
            defines the pimp-method by which to choose the plotted parameters
        max_runs_epm: int
            maximum number of runs to train the epm with. this should prevent MemoryErrors
        output_dir: str
            output directory for plots
        cs: ConfigurationSpace
            parameter configuration space to be visualized
        runtime: boolean
            runtime will be on logscale
        """

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)
        self.error = None

        self.default = default
        self.param_imp = param_imp
        self.cs = cs

        # Sorting by importance, if possible (choose first executed parameter-importance)
        self.method, self.importance = "", {}
        if pc_sort_by == 'all':
            self.logger.debug("Sorting by average importance")
            self.method = 'average'
            for m, i in self.param_imp.items():
                if i:
                    for p, imp in i.items():
                        if p in self.importance:
                            self.importance[p].append(imp)
                        else:
                            self.importance[p] = [imp]
            self.importance = {
                k: sum(v) / len(v)
                for k, v in self.importance.items()
            }
        elif pc_sort_by in self.param_imp:
            self.method, self.importance = pc_sort_by, self.param_imp[
                pc_sort_by]
        else:
            self.logger.debug("%s not evaluated.. choosing at random from: %s",
                              pc_sort_by, str(list(self.param_imp.keys())))
            for m, i in self.param_imp.items():
                if i:
                    self.method, self.importance = m, i
                    break

        self.hp_names = sorted(
            [hp for hp in self.cs.get_hyperparameter_names()],
            key=lambda x: self.importance.get(x, 0),
            reverse=True)
        self.logger.debug("Sorted hp's by method \'%s\': %s", self.method,
                          str(self.hp_names))

        # To be set
        self.plots = []

        # Define set of configurations (limiting to max and choosing most interesting ones)
        all_configs = original_rh.get_all_configs()
        max_runs_epm = 300000  # Maximum total number of runs considered for epm to limit maximum possible number configs
        max_configs = int(
            max_runs_epm /
            (len(scenario.train_insts) + len(scenario.test_insts)))
        if len(all_configs) > max_configs:
            self.logger.debug(
                "Limiting number of configs to train epm from %d to %d (based on max runs %d) and choosing "
                "the ones with the most runs (for parallel coordinates)",
                len(all_configs), max_configs, max_runs_epm)
            all_configs = sorted(
                all_configs,
                key=lambda c: len(original_rh.get_runs_for_config(c)
                                  ))[:max_configs]
            if not default in all_configs:
                all_configs = [default] + all_configs
            if not incumbent in all_configs:
                all_configs.append(incumbent)

        # Get costs for those configurations
        epm_rh = RunHistory(average_cost)
        epm_rh.update(validated_rh)
        if scenario.feature_dict:  # if instances are available
            epm_rh.update(
                timing(validator.validate_epm)(all_configs,
                                               'train+test',
                                               1,
                                               runhistory=validated_rh))
        self.config_to_cost = {c: epm_rh.get_cost(c) for c in all_configs}

        self.params = self.get_params(params)
        self.n_configs = n_configs

        self.pcp = ParallelCoordinatesPlotter(self.config_to_cost, output_dir,
                                              cs, runtime)
    def _preprocess_budget(
        self,
        original_rh: RunHistory,
        validated_rh: RunHistory,
        validator: Validator,
        scenario: Scenario,
        default: Configuration,
        incumbent: Configuration,
        param_imp: Union[None, Dict[str, float]],
        output_dir: str,
        cs: ConfigurationSpace,
        runtime: bool = False,
    ):
        """
        Preprocess data and save in self.data to enable fast replots

        Parameters:
        -----------
        original_rh: RunHistory
            runhistory that should contain only runs that were executed during search
        validated_rh: RunHistory
            runhistory that may contain as many runs as possible, also external runs.
            this runhistory will be used to build the EPM
        validator: Validator
            validator to be used to estimate costs for configurations
        scenario: Scenario
            scenario object to take instances from
        default, incumbent: Configuration
            default and incumbent, they will surely be displayed
        param_imp: Union[None, Dict[str->float]
            if given, maps parameter-names to importance
        output_dir: str
            output directory for plots
        cs: ConfigurationSpace
            parameter configuration space to be visualized
        runtime: boolean
            runtime will be on logscale
        """
        # Sorting parameters by importance, if possible (choose first executed parameter-importance)
        method, importance = "", {}
        if self.pc_sort_by == 'all':
            self.logger.debug("Sorting by average importance")
            method = 'average'
            for m, i in param_imp.items():
                if i:
                    for p, imp in i.items():
                        if p in importance:
                            importance[p].append(imp)
                        else:
                            importance[p] = [imp]
            importance = {k: sum(v) / len(v) for k, v in importance.items()}
        elif self.pc_sort_by in param_imp:
            method, importance = self.pc_sort_by, param_imp[self.pc_sort_by]
        else:
            self.logger.debug("%s not evaluated.. choosing at random from: %s",
                              self.pc_sort_by, str(list(param_imp.keys())))
            for m, i in param_imp.items():
                if i:
                    method, importance = m, i
                    self.logger.debug("Chose %s", method)
                    break

        hp_names = sorted([p for p in cs.get_hyperparameter_names()],
                          key=lambda x: importance.get(x, 0),
                          reverse=True)
        self.logger.debug("Sorted hyperparameters by method \'%s\': %s",
                          method, str(hp_names))

        # Define set of configurations (limiting to max and choosing most interesting ones)
        all_configs = original_rh.get_all_configs()
        # max_runs_epm is the maximum total number of runs considered for epm to limit maximum possible number configs
        max_configs = int(
            self.max_runs_epm /
            (len(scenario.train_insts) + len(scenario.test_insts)))
        if len(all_configs) > max_configs:
            self.logger.debug(
                "Limiting number of configs to train epm from %d to %d (based on max runs %d) and "
                "choosing the ones with the most runs (for parallel coordinates)",
                len(all_configs), max_configs, self.max_runs_epm)
            all_configs = sorted(all_configs,
                                 key=lambda c: len(
                                     original_rh.get_runs_for_config(
                                         c, only_max_observed_budget=False)))
            all_configs = all_configs[:max_configs]
            if default not in all_configs:
                all_configs = [default] + all_configs
            if incumbent not in all_configs:
                all_configs.append(incumbent)

        # Get costs for those configurations
        epm_rh = RunHistory()
        epm_rh.update(validated_rh)
        if scenario.feature_dict:  # if instances are available
            epm_rh.update(
                timing(validator.validate_epm)(all_configs,
                                               'train+test',
                                               1,
                                               runhistory=validated_rh))
        config_to_cost = OrderedDict(
            {c: epm_rh.get_cost(c)
             for c in all_configs})

        data = OrderedDict()
        data['cost'] = list(config_to_cost.values())
        for hp in self.runscontainer.scenario.cs.get_hyperparameter_names():
            data[hp] = np.array([
                c[hp]  # if hp in c.get_dictionary() and not isinstance(c[hp], str) else np.nan
                for c in config_to_cost.keys()
            ])
        df = pd.DataFrame(data=data)
        return df