Example #1
0
    def _get_instances_to_run(
        self,
        challenger: Configuration,
        incumbent: Configuration,
        N: int,
        run_history: RunHistory,
    ) -> typing.Tuple[typing.List[InstSeedBudgetKey], float]:
        """
        Returns the minimum list of <instance, seed> pairs to run the challenger on
        before comparing it with the incumbent

        Parameters
        ----------
        incumbent: Configuration
            incumbent configuration
        challenger: Configuration
            promising configuration that is presently being evaluated
        run_history: RunHistory
            Stores all runs we ran so far
        N: int
            number of <instance, seed> pairs to select

        Returns
        -------
        typing.List[InstSeedBudgetKey]
            list of <instance, seed, budget> tuples to run
        float
            total (runtime) cost of running the incumbent on the instances (used for adaptive capping while racing)
        """
        # get next instances left for the challenger
        # Line 8
        inc_inst_seeds = set(
            run_history.get_runs_for_config(incumbent,
                                            only_max_observed_budget=True))
        chall_inst_seeds = set(
            run_history.get_runs_for_config(challenger,
                                            only_max_observed_budget=True))
        # Line 10
        missing_runs = sorted(inc_inst_seeds - chall_inst_seeds)

        # Line 11
        self.rs.shuffle(missing_runs)
        if N < 0:
            raise ValueError(
                'Argument N must not be smaller than zero, but is %s' % str(N))
        to_run = missing_runs[:min(N, len(missing_runs))]
        missing_runs = missing_runs[min(N, len(missing_runs)):]

        # for adaptive capping
        # because of efficiency computed here
        inst_seed_pairs = list(inc_inst_seeds - set(missing_runs))
        # cost used by incumbent for going over all runs in inst_seed_pairs
        inc_sum_cost = run_history.sum_cost(
            config=incumbent,
            instance_seed_budget_keys=inst_seed_pairs,
        )

        return to_run, inc_sum_cost
Example #2
0
    def test_get_config_runs(self):
        '''
            get some config runs from runhistory
        '''
        # return max observed budget only
        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs,
                                values={'a': 1, 'b': 2})
        config2 = Configuration(cs,
                                values={'a': 1, 'b': 3})
        rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS,
               instance_id=1, seed=1, budget=1)
        rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS,
               instance_id=1, seed=1, budget=2)
        with self.assertRaisesRegex(ValueError, 'This should not happen!'):
            rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS,
                   instance_id=2, seed=2, budget=1)

        rh.add(config=config2, cost=10, time=20, status=StatusType.SUCCESS,
               instance_id=1, seed=1, budget=1)

        ist = rh.get_runs_for_config(config=config1, only_max_observed_budget=True)

        self.assertEqual(len(ist), 2)
        self.assertEqual(ist[0].instance, 1)
        self.assertEqual(ist[1].instance, 2)
        self.assertEqual(ist[0].budget, 2)
        self.assertEqual(ist[1].budget, 1)

        # multiple budgets (only_max_observed_budget=False)
        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs,
                                values={'a': 1, 'b': 2})
        config2 = Configuration(cs,
                                values={'a': 1, 'b': 3})
        rh.add(config=config1, cost=5, time=10, status=StatusType.SUCCESS,
               instance_id=1, seed=1, budget=1)
        rh.add(config=config1, cost=10, time=20, status=StatusType.SUCCESS,
               instance_id=1, seed=1, budget=2)

        rh.add(config=config2, cost=5, time=10, status=StatusType.SUCCESS,
               instance_id=1, seed=1, budget=1)
        rh.add(config=config2, cost=10, time=20, status=StatusType.SUCCESS,
               instance_id=1, seed=1, budget=2)

        ist = rh.get_runs_for_config(config=config1, only_max_observed_budget=False)

        self.assertEqual(len(ist), 2)
        self.assertEqual(ist[0].instance, 1)
        self.assertEqual(ist[0].budget, 1)
        self.assertEqual(ist[1].budget, 2)
Example #3
0
    def test_get_config_runs(self):
        '''
            get some config runs from runhistory
        '''

        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs,
                                values={'a': 1, 'b': 2})
        config2 = Configuration(cs,
                                values={'a': 1, 'b': 3})
        rh.add(config=config1, cost=10, time=20,
               status=StatusType.SUCCESS, instance_id=1,
               seed=1)

        rh.add(config=config2, cost=10, time=20,
               status=StatusType.SUCCESS, instance_id=1,
               seed=1)

        rh.add(config=config1, cost=10, time=20,
               status=StatusType.SUCCESS, instance_id=2,
               seed=2)

        ist = rh.get_runs_for_config(config=config1)
        #print(ist)
        #print(ist[0])
        #print(ist[1])
        self.assertEqual(len(ist), 2)
        self.assertEqual(ist[0].instance, 1)
        self.assertEqual(ist[1].instance, 2)
Example #4
0
def get_cost_dict_for_config(rh: RunHistory,
                             conf: Configuration,
                             par: int = 1,
                             cutoff: typing.Union[float, None] = None):
    """
    Aggregates loss for configuration on evaluated instances over seeds.

    Parameters
    ----------
    rh: RunHistory
        runhistory with data
    conf: Configuration
        configuration to evaluate
    par: int
        par-factor with which to multiply timeouts
    cutoff: float
        cutoff of scenario - used to penalize costs if par != 1

    Returns
    -------
    cost: dict(instance->cost)
        cost per instance (aggregated or as list per seed)
    """
    # Check if config is in runhistory
    conf_id = rh.config_ids[conf]

    # Map instances to seeds in dict
    runs = rh.get_runs_for_config(conf)
    instance_to_seeds = dict()
    for run in runs:
        inst, seed = run
        if inst in instance_to_seeds:
            instance_to_seeds[inst].append(seed)
        else:
            instance_to_seeds[inst] = [seed]

    # Get loss per instance
    instance_costs = {
        i: [rh.data[RunKey(conf_id, i, s)].cost for s in instance_to_seeds[i]]
        for i in instance_to_seeds
    }

    # Aggregate:
    instance_costs = {i: np.mean(instance_costs[i]) for i in instance_costs}

    # TODO: uncomment next line and delete all above after next SMAC dev->master
    # instance_costs = rh.get_instance_costs_for_config(conf)

    if par != 1:
        if cutoff:
            instance_costs = {
                k: v if v < cutoff else v * par
                for k, v in instance_costs.items()
            }
        else:
            raise ValueError(
                "To apply penalization of costs, a cutoff needs to be provided."
            )

    return instance_costs
Example #5
0
def _cost(config: Configuration, run_history: RunHistory,
          instance_seed_pairs=None):
    """Return array of all costs for the given config for further calculations.

    Parameters
    ----------
    config : Configuration
        Configuration to calculate objective for
    run_history : RunHistory
        RunHistory object from which the objective value is computed.
    instance_seed_pairs : list, optional (default=None)
        List of tuples of instance-seeds pairs. If None, the run_history is
        queried for all runs of the given configuration.

    Returns
    -------
    Costs: list
        Array of all costs
    """
    try:
        id_ = run_history.config_ids[config]
    except KeyError:  # challenger was not running so far
        return []

    if instance_seed_pairs is None:
        instance_seed_pairs = run_history.get_runs_for_config(config)

    costs = []
    for i, r in instance_seed_pairs:
        k = RunKey(id_, i, r)
        costs.append(run_history.data[k].cost)
    return costs
Example #6
0
    def _top_k(self, configs: typing.List[Configuration],
               run_history: RunHistory, k: int) -> typing.List[Configuration]:
        """
        Selects the top 'k' configurations from the given list based on their performance.

        This retrieves the performance for each configuration from the runhistory and checks
        that the highest budget they've been evaluated on is the same for each of the configurations.

        Parameters
        ----------
        configs: typing.List[Configuration]
            list of configurations to filter from
        run_history: smac.runhistory.runhistory.RunHistory
            stores all runs we ran so far
        k: int
            number of configurations to select

        Returns
        -------
        typing.List[Configuration]
            top challenger configurations, sorted in increasing costs
        """
        # extracting costs for each given configuration
        config_costs = {}
        # sample list instance-seed-budget key to act as base
        run_key = run_history.get_runs_for_config(
            configs[0], only_max_observed_budget=True)
        for c in configs:
            # ensuring that all configurations being compared are run on the same set of instance, seed & budget
            cur_run_key = run_history.get_runs_for_config(
                c, only_max_observed_budget=True)
            if cur_run_key != run_key:
                raise ValueError(
                    'Cannot compare configs that were run on different instances-seeds-budgets: %s vs %s'
                    % (run_key, cur_run_key))
            config_costs[c] = run_history.get_cost(c)

        configs_sorted = sorted(config_costs, key=config_costs.get)
        # select top configurations only
        top_configs = configs_sorted[:k]
        return top_configs
Example #7
0
    def test_merge_foreign_data(self):
        ''' test smac.utils.merge_foreign_data '''

        scenario = Scenario(self.test_scenario_dict)
        scenario_2 = Scenario(self.test_scenario_dict)
        scenario_2.feature_dict = {"inst_new": [4]}

        # init cs
        cs = ConfigurationSpace()
        cs.add_hyperparameter(UniformIntegerHyperparameter(name='a',
                                                           lower=0,
                                                           upper=100))
        cs.add_hyperparameter(UniformIntegerHyperparameter(name='b',
                                                           lower=0,
                                                           upper=100))
        # build runhistory
        rh_merge = RunHistory()
        config = Configuration(cs, values={'a': 1, 'b': 2})

        rh_merge.add(config=config, instance_id="inst_new", cost=10, time=20,
                     status=StatusType.SUCCESS,
                     seed=None,
                     additional_info=None)

        # "d" is an instance in <scenario>
        rh_merge.add(config=config, instance_id="d", cost=5, time=20,
                     status=StatusType.SUCCESS,
                     seed=None,
                     additional_info=None)

        # build empty rh
        rh_base = RunHistory()

        merge_foreign_data(scenario=scenario, runhistory=rh_base,
                           in_scenario_list=[scenario_2], in_runhistory_list=[rh_merge])

        # both runs should be in the runhistory
        # but we should not use the data to update the cost of config
        self.assertTrue(len(rh_base.data) == 2)
        self.assertTrue(np.isnan(rh_base.get_cost(config)))

        # we should not get direct access to external run data
        runs = rh_base.get_runs_for_config(config, only_max_observed_budget=True)
        self.assertTrue(len(runs) == 0)

        rh_merge.add(config=config, instance_id="inst_new_2", cost=10, time=20,
                     status=StatusType.SUCCESS,
                     seed=None,
                     additional_info=None)

        self.assertRaises(ValueError, merge_foreign_data, **{
                          "scenario": scenario, "runhistory": rh_base,
                          "in_scenario_list": [scenario_2], "in_runhistory_list": [rh_merge]})
Example #8
0
    def test_add_multiple_times(self):
        rh = RunHistory()
        cs = get_config_space()
        config = Configuration(cs, values={'a': 1, 'b': 2})

        for i in range(5):
            rh.add(config=config, cost=i + 1, time=i + 1,
                   status=StatusType.SUCCESS, instance_id=None,
                   seed=12345, additional_info=None)

        self.assertEqual(len(rh.data), 1)
        self.assertEqual(len(rh.get_runs_for_config(config, only_max_observed_budget=True)), 1)
        self.assertEqual(len(rh._configid_to_inst_seed_budget[1]), 1)
        self.assertEqual(list(rh.data.values())[0].cost, 1)
Example #9
0
    def _process_inc_run(
        self,
        incumbent: Configuration,
        run_history: RunHistory,
        log_traj: bool = True,
    ) -> None:
        """Method to process the results of a challenger that races
        an incumbent

        Parameters
        ----------
        incumbent: Configuration
            Either challenger or incumbent
        run_history : RunHistory
            stores all runs we ran so far
        log_traj: bool
            Whether to log changes of incumbents in trajectory

        """
        # output estimated performance of incumbent
        inc_runs = run_history.get_runs_for_config(
            incumbent, only_max_observed_budget=True)
        inc_perf = run_history.get_cost(incumbent)
        format_value = format_array(inc_perf)
        self.logger.info(
            f"Updated estimated cost of incumbent on {len(inc_runs)} runs: {format_value}"
        )

        # if running first configuration, go to next stage after 1st run
        if self.stage in [
                IntensifierStage.RUN_FIRST_CONFIG,
                IntensifierStage.PROCESS_FIRST_CONFIG_RUN
        ]:
            self.stage = IntensifierStage.RUN_INCUMBENT
            self._next_iteration()
        else:
            # Termination condition; after each run, this checks
            # whether further runs are necessary due to minR
            if len(inc_runs) >= self.minR or len(inc_runs) >= self.maxR:
                self.stage = IntensifierStage.RUN_CHALLENGER
            else:
                self.stage = IntensifierStage.RUN_INCUMBENT

        self._compare_configs(incumbent=incumbent,
                              challenger=incumbent,
                              run_history=run_history,
                              log_traj=log_traj)
Example #10
0
    def _adapt_cutoff(self,
                      challenger: Configuration,
                      run_history: RunHistory,
                      inc_sum_cost: float) -> float:
        """Adaptive capping:
        Compute cutoff based on time so far used for incumbent
        and reduce cutoff for next run of challenger accordingly

        !Only applicable if self.run_obj_time

        !runs on incumbent should be superset of the runs performed for the
         challenger

        Parameters
        ----------
        challenger : Configuration
            Configuration which challenges incumbent
        run_history : smac.runhistory.runhistory.RunHistory
            Stores all runs we ran so far
        inc_sum_cost: float
            Sum of runtimes of all incumbent runs

        Returns
        -------
        cutoff: float
            Adapted cutoff
        """

        if not self.run_obj_time:
            raise ValueError('This method only works when the run objective is quality')

        curr_cutoff = self.cutoff if self.cutoff is not None else np.inf

        # cost used by challenger for going over all its runs
        # should be subset of runs of incumbent (not checked for efficiency
        # reasons)
        chall_inst_seeds = run_history.get_runs_for_config(challenger, only_max_observed_budget=True)
        chal_sum_cost = run_history.sum_cost(
            config=challenger,
            instance_seed_budget_keys=chall_inst_seeds,
        )
        cutoff = min(curr_cutoff,
                     inc_sum_cost * self.adaptive_capping_slackfactor - chal_sum_cost
                     )
        return cutoff
    def reduce_runhistory(self, rh: RunHistory, max_configs: int, keep=None):
        """
        Reduce configs to desired number, by default just drop the configs with the fewest runs.

        Parameters
        ----------
        rh: RunHistory
            runhistory that is to be reduced
        max_configs: int
            if > -1 reduce runhistory to at most max_configs
        keep: List[Configuration]
            list of configs that should be kept for sure (e.g. default, incumbents)

        Returns
        -------
        rh: RunHistory
            reduced runhistory
        """
        configs = rh.get_all_configs()
        if max_configs <= 0 or max_configs > len(configs):  # keep all
            return rh

        runs = [(c,
                 len(rh.get_runs_for_config(c,
                                            only_max_observed_budget=False)))
                for c in configs]
        if not keep:
            keep = []
        runs = sorted(runs, key=lambda x: x[1])[-self.max_plot:]
        keep = [r[0] for r in runs] + keep
        self.logger.info(
            "Reducing number of configs from %d to %d, dropping from the fewest evaluations",
            len(configs), len(keep))

        new_rh = RunHistory()
        for k, v in list(rh.data.items()):
            c = rh.ids_config[k.config_id]
            if c in keep:
                new_rh.add(config=rh.ids_config[k.config_id],
                           cost=v.cost,
                           time=v.time,
                           status=v.status,
                           instance_id=k.instance_id,
                           seed=k.seed)
        return new_rh
Example #12
0
    def _adapt_cutoff(self, challenger: Configuration,
                      incumbent: Configuration,
                      run_history: RunHistory,
                      inc_sum_cost: float):
        """Adaptive capping:
        Compute cutoff based on time so far used for incumbent
        and reduce cutoff for next run of challenger accordingly

        !Only applicable if self.run_obj_time

        !runs on incumbent should be superset of the runs performed for the
         challenger

        Parameters
        ----------
        challenger : Configuration
            Configuration which challenges incumbent
        incumbent : Configuration
            Best configuration so far
        run_history : RunHistory
            Stores all runs we ran so far
        inc_sum_cost: float
            Sum of runtimes of all incumbent runs

        Returns
        -------
        cutoff: int
            Adapted cutoff
        """

        if not self.run_obj_time:
            return self.cutoff

        # cost used by challenger for going over all its runs
        # should be subset of runs of incumbent (not checked for efficiency
        # reasons)
        chall_inst_seeds = run_history.get_runs_for_config(challenger)
        chal_sum_cost = sum_cost(config=challenger,
                                 instance_seed_pairs=chall_inst_seeds,
                                 run_history=run_history)
        cutoff = min(self.cutoff,
                     inc_sum_cost * self.Adaptive_Capping_Slackfactor -
                     chal_sum_cost
                     )
        return cutoff
Example #13
0
    def _get_inc_available_inst(
        self,
        incumbent: Configuration,
        run_history: RunHistory,
        log_traj: bool = True,
    ) -> typing.List[str]:
        """
        Implementation of line 4 of Intensification

        This method queries the inc runs in the run history
        and return the pending instances if any is available

        Parameters
        ----------
        incumbent: Configuration
            Either challenger or incumbent
        run_history : RunHistory
            stores all runs we ran so far
        log_traj: bool
            Whether to log changes of incumbents in trajectory

        """
        # Line 4
        # find all instances that have the most runs on the inc
        inc_runs = run_history.get_runs_for_config(
            incumbent,
            only_max_observed_budget=True
        )
        inc_inst = [s.instance for s in inc_runs]
        inc_inst = list(Counter(inc_inst).items())
        inc_inst.sort(key=lambda x: x[1], reverse=True)
        try:
            max_runs = inc_inst[0][1]
        except IndexError:
            self.logger.debug("No run for incumbent found")
            max_runs = 0
        inc_inst = [x[0] for x in inc_inst if x[1] == max_runs]

        available_insts = list(sorted(set(self.instances) - set(inc_inst)))

        # if all instances were used n times, we can pick an instances
        # from the complete set again
        if not self.deterministic and not available_insts:
            available_insts = self.instances
        return available_insts
Example #14
0
    def _get_runs_per_config_quantiled(self, rh, conf_list, quantiles):
        """Returns a list of lists, each sublist representing the current state
        at that timestep (quantile). The current state means a list of times
        each config was evaluated at that timestep.

        Parameters
        ----------
        rh: RunHistory
            rh to evaluate
        conf_list: list
            list of all Configuration objects that appeared in runhistory
        quantiles: int
            number of fractions to split rh into

        Returns:
        --------
        runs_per_quantile: np.array
            numpy array of runs per configuration per quantile
        """
        runs_total = len(rh.data)
        # Create LINEAR ranges. TODO do we want log? -> this line
        ranges = [int(r) for r in np.linspace(0, runs_total, quantiles + 1)]
        self.logger.debug(
            "Creating %d quantiles with a step of %.2f and a total "
            "runs of %d", quantiles, runs_total / quantiles, runs_total)
        self.logger.debug("Ranges: %s", str(ranges))

        # Iterate over the runhistory's entries in ranges and creating each
        # sublist from a "snapshot"-runhistory
        r_p_q_p_c = []  # runs per quantile per config
        as_list = list(rh.data.items())
        tmp_rh = RunHistory(average_cost)
        for i, j in zip(ranges[:-1], ranges[1:]):
            for idx in range(i, j):
                k, v = as_list[idx]
                tmp_rh.add(config=rh.ids_config[k.config_id],
                           cost=v.cost,
                           time=v.time,
                           status=v.status,
                           instance_id=k.instance_id,
                           seed=k.seed)
            r_p_q_p_c.append(
                [len(tmp_rh.get_runs_for_config(c)) for c in conf_list])
        return r_p_q_p_c
Example #15
0
    def test_add_multiple_times(self):
        rh = RunHistory()
        cs = get_config_space()
        config = Configuration(cs, values={"a": 1, "b": 2})

        for i in range(5):
            rh.add(
                config=config,
                cost=[i + 1, i + 2],
                time=i + 1,
                status=StatusType.SUCCESS,
                instance_id=None,
                seed=12345,
                additional_info=None,
            )

        self.assertEqual(len(rh.data), 1)
        self.assertEqual(
            len(rh.get_runs_for_config(config, only_max_observed_budget=True)),
            1)
        self.assertEqual(len(rh._configid_to_inst_seed_budget[1]), 1)

        # We expect to get 1.0 and 2.0 because runhistory does not overwrite by default
        self.assertEqual(list(rh.data.values())[0].cost, [1.0, 2.0])
class TestIntensify(unittest.TestCase):
    def setUp(self):
        unittest.TestCase.setUp(self)

        self.rh = RunHistory(aggregate_func=average_cost)
        self.cs = get_config_space()
        self.config1 = Configuration(self.cs, values={'a': 0, 'b': 100})
        self.config2 = Configuration(self.cs, values={'a': 100, 'b': 0})
        self.config3 = Configuration(self.cs, values={'a': 100, 'b': 100})

        self.scen = Scenario({
            "cutoff_time": 2,
            'cs': self.cs,
            "run_obj": 'runtime',
            "output_dir": ''
        })
        self.stats = Stats(scenario=self.scen)
        self.stats.start_timing()

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

    def test_compare_configs_no_joint_set(self):
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=None,
                                  instances=[1])

        for i in range(2):
            self.rh.add(config=self.config1,
                        cost=2,
                        time=2,
                        status=StatusType.SUCCESS,
                        instance_id=1,
                        seed=i,
                        additional_info=None)

        for i in range(2, 5):
            self.rh.add(config=self.config2,
                        cost=1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=1,
                        seed=i,
                        additional_info=None)

        # The sets for the incumbent are completely disjoint.
        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh,
                                            aggregate_func=average_cost)
        self.assertIsNone(conf)

        # The incumbent has still one instance-seed pair left on which the
        # challenger was not run yet.
        self.rh.add(config=self.config2,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=1,
                    additional_info=None)
        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh,
                                            aggregate_func=average_cost)
        self.assertIsNone(conf)

    def test_compare_configs_chall(self):
        '''
            challenger is better
        '''
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=None,
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=2,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config2,
                    cost=0,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh,
                                            aggregate_func=average_cost)

        # challenger has enough runs and is better
        self.assertEqual(conf, self.config2, "conf: %s" % (conf))

    def test_compare_configs_inc(self):
        '''
            incumbent is better
        '''
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=None,
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config2,
                    cost=2,
                    time=2,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh,
                                            aggregate_func=average_cost)

        # challenger worse than inc
        self.assertEqual(conf, self.config1, "conf: %s" % (conf))

    def test_compare_configs_unknow(self):
        '''
            challenger is better but has less runs;
            -> no decision (None)
        '''
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=None,
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config1,
                    cost=1,
                    time=2,
                    status=StatusType.SUCCESS,
                    instance_id=2,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=2,
                    seed=None,
                    additional_info=None)

        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh,
                                            aggregate_func=average_cost)

        # challenger worse than inc
        self.assertIsNone(conf, "conf: %s" % (conf))

    def test_race_challenger(self):
        '''
           test _race_challenger without adaptive capping
        '''
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)

        self.assertEqual(inc, self.config2)

    def test_race_challenger_2(self):
        '''
           test _race_challenger with adaptive capping
        '''
        def target(x):
            time.sleep(1.5)
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats, run_obj="runtime")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=.001,
                    time=0.001,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=12345,
                    additional_info=None)

        # config2 should have a timeout (due to adaptive capping)
        # and config1 should still be the incumbent
        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)

        # self.assertTrue(False)
        self.assertEqual(inc, self.config1)

    def test_race_challenger_3(self):
        '''
           test _race_challenger with adaptive capping on a previously capped configuration  
        '''
        def target(config: Configuration, seed: int, instance: str):
            if instance == 1:
                time.sleep(2.1)
            else:
                time.sleep(0.6)
            return (config['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                run_obj="runtime",
                                par_factor=1)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  cutoff=2,
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=0.5,
                    time=.5,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=12345,
                    additional_info=None)

        # config2 should have a timeout (due to adaptive capping)
        # and config1 should still be the incumbent
        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)
        # self.assertTrue(False)
        self.assertEqual(inc, self.config1)

        # further run for incumbent
        self.rh.add(config=self.config1,
                    cost=2,
                    time=2,
                    status=StatusType.TIMEOUT,
                    instance_id=2,
                    seed=12345,
                    additional_info=None)

        # give config2 a second chance
        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)

        # the incumbent should still be config1 because
        # config2 should get on inst 1 a full timeout
        # such that c(config1) = 1.25 and c(config2) close to 1.3
        self.assertEqual(inc, self.config1)
        # the capped run should not be counted in runs_perf_config
        self.assertAlmostEqual(self.rh.runs_per_config[2], 2)

    def test_race_challenger_large(self):
        '''
           test _race_challenger using solution_quality
        '''
        def target(x):
            return 1

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=list(range(10)),
                                  deterministic=True)

        for i in range(10):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=12345,
                        additional_info=None)

        # tie on first instances and then challenger should always win
        # and be returned as inc
        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)

        # self.assertTrue(False)
        self.assertEqual(inc, self.config2)
        self.assertEqual(self.rh.get_cost(self.config2), 1,
                         self.rh.get_cost(self.config2))

        # get data for config2 to check that the correct run was performed
        runs = self.rh.get_runs_for_config(self.config2)
        self.assertEqual(len(runs), 10)

    def test_race_challenger_large_blocked_seed(self):
        '''
           test _race_challenger whether seeds are blocked for challenger runs
        '''
        def target(x):
            return 1

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=list(range(10)),
                                  deterministic=False)

        for i in range(10):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)

        # tie on first instances and then challenger should always win
        # and be returned as inc
        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           aggregate_func=average_cost)

        # self.assertTrue(False)
        self.assertEqual(inc, self.config2)
        self.assertEqual(self.rh.get_cost(self.config2), 1,
                         self.rh.get_cost(self.config2))

        # get data for config2 to check that the correct run was performed
        runs = self.rh.get_runs_for_config(self.config2)
        self.assertEqual(len(runs), 10)

        seeds = sorted([r.seed for r in runs])
        self.assertEqual(seeds, list(range(10)), seeds)

    def test_add_inc_run_det(self):
        '''
            test _add_inc_run()
        '''
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                run_obj="solution_quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  deterministic=True)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

        # since we assume deterministic=1,
        # the second call should not add any more runs
        # given only one instance
        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

    def test_add_inc_run_nondet(self):
        '''
            test _add_inc_run()
        '''
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                run_obj="solution_quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1, 2],
                                  deterministic=False)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 2, self.rh.data)
        runs = self.rh.get_runs_for_config(config=self.config1)
        # exactly one run on each instance
        self.assertIn(1, [runs[0].instance, runs[1].instance])
        self.assertIn(2, [runs[0].instance, runs[1].instance])

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 3, self.rh.data)

    def test_adaptive_capping(self):
        '''
            test _adapt_cutoff()
        '''
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=list(range(5)),
                                  deterministic=False)

        for i in range(5):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=i + 1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)
        for i in range(3):
            self.rh.add(config=self.config2,
                        cost=i + 1,
                        time=i + 1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)

        inst_seed_pairs = self.rh.get_runs_for_config(self.config1)
        # cost used by incumbent for going over all runs in inst_seed_pairs
        inc_sum_cost = sum_cost(config=self.config1,
                                instance_seed_pairs=inst_seed_pairs,
                                run_history=self.rh)

        cutoff = intensifier._adapt_cutoff(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           inc_sum_cost=inc_sum_cost)
        # 15*1.2 - 6
        self.assertEqual(cutoff, 12)

        intensifier.cutoff = 5

        cutoff = intensifier._adapt_cutoff(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh,
                                           inc_sum_cost=inc_sum_cost)
        # scenario cutoff
        self.assertEqual(cutoff, 5)
Example #17
0
    def _compare_configs(self, incumbent: Configuration,
                         challenger: Configuration, run_history: RunHistory,
                         aggregate_func: typing.Callable):
        '''
            compare two configuration wrt the runhistory
            and return the one which performs better (or None if the decision is not safe)

            Decision strategy to return x as being better than y:
                1. x has at least as many runs as y
                2. x performs better than y on the intersection of runs on x and y

            Implicit assumption:
                challenger was evaluated on the same instance-seed pairs as incumbent

            Parameters
            ----------
            incumbent: Configuration
                current incumbent
            challenger: Configuration
                challenger configuration
            run_history: RunHistory
                stores all runs we ran so far
            aggregate_func: typing.Callable
                aggregate performance across instances

            Returns
            -------
            None or better of the two configurations x,y
        '''

        inc_runs = run_history.get_runs_for_config(incumbent)
        chall_runs = run_history.get_runs_for_config(challenger)
        to_compare_runs = set(inc_runs).intersection(chall_runs)

        # performance on challenger runs
        chal_perf = aggregate_func(challenger, run_history, to_compare_runs)
        inc_perf = aggregate_func(incumbent, run_history, to_compare_runs)

        # Line 15
        if chal_perf > inc_perf and len(chall_runs) >= self.minR:
            # Incumbent beats challenger
            self.logger.debug(
                "Incumbent (%.4f) is better than challenger (%.4f) on %d runs."
                % (inc_perf, chal_perf, len(chall_runs)))
            return incumbent

        # Line 16
        if not set(inc_runs) - set(chall_runs):
            # Challenger is as good as incumbent
            # and has at least the same runs as inc
            # -> change incumbent

            n_samples = len(chall_runs)
            self.logger.info(
                "Challenger (%.4f) is better than incumbent (%.4f) on %d runs."
                % (chal_perf, inc_perf, n_samples))
            # Show changes in the configuration
            params = sorted([(param, incumbent[param], challenger[param])
                             for param in challenger.keys()])
            self.logger.info("Changes in incumbent:")
            for param in params:
                if param[1] != param[2]:
                    self.logger.info("  %s : %r -> %r" % (param))
                else:
                    self.logger.debug("  %s remains unchanged: %r" %
                                      (param[0], param[1]))
            self.stats.inc_changed += 1
            self.traj_logger.add_entry(train_perf=chal_perf,
                                       incumbent_id=self.stats.inc_changed,
                                       incumbent=challenger)
            return challenger

        return None  # undecided
Example #18
0
    def _race_challenger(self, challenger: Configuration,
                         incumbent: Configuration, run_history: RunHistory,
                         aggregate_func: typing.Callable):
        '''
            aggressively race challenger against incumbent

            Parameters
            ----------
            challenger : Configuration
                configuration which challenges incumbent
            incumbent : Configuration
                best configuration so far
            run_history : RunHistory
                stores all runs we ran so far
            aggregate_func: typing.Callable
                aggregate performance across instances

            Returns
            -------
            new_incumbent: Configuration
                either challenger or incumbent
        '''
        # at least one run of challenger
        # to increase chall_indx counter
        first_run = False

        # Line 8
        N = max(1, self.minR)

        inc_inst_seeds = set(run_history.get_runs_for_config(incumbent))
        # Line 9
        while True:
            chall_inst_seeds = set(run_history.get_runs_for_config(challenger))

            # Line 10
            missing_runs = list(inc_inst_seeds - chall_inst_seeds)

            # Line 11
            self.rs.shuffle(missing_runs)
            to_run = missing_runs[:min(N, len(missing_runs))]
            # Line 13 (Line 12 comes below...)
            missing_runs = missing_runs[min(N, len(missing_runs)):]

            # for adaptive capping
            # because of efficieny computed here
            inst_seed_pairs = list(inc_inst_seeds - set(missing_runs))
            # cost used by incumbent for going over all runs in inst_seed_pairs
            inc_sum_cost = sum_cost(config=incumbent,
                                    instance_seed_pairs=inst_seed_pairs,
                                    run_history=run_history)

            # Line 12
            # Run challenger on all <config,seed> to run
            for instance, seed in to_run:

                cutoff = self._adapt_cutoff(challenger=challenger,
                                            incumbent=incumbent,
                                            run_history=run_history,
                                            inc_sum_cost=inc_sum_cost)
                if cutoff is not None and cutoff <= 0:  # no time to validate challenger
                    self.logger.debug(
                        "Stop challenger itensification due to adaptive capping."
                    )
                    # challenger performs worse than incumbent
                    return incumbent

                if not first_run:
                    first_run = True
                    self._chall_indx += 1

                self.logger.debug("Add run of challenger")
                try:
                    status, cost, dur, res = self.tae_runner.start(
                        config=challenger,
                        instance=instance,
                        seed=seed,
                        cutoff=cutoff,
                        instance_specific=self.instance_specifics.get(
                            instance, "0"),
                        capped=(self.cutoff is not None)
                        and (cutoff < self.cutoff))
                    self._num_run += 1
                except CappedRunException:
                    return incumbent

            new_incumbent = self._compare_configs(
                incumbent=incumbent,
                challenger=challenger,
                run_history=run_history,
                aggregate_func=aggregate_func)
            if new_incumbent == incumbent:
                break
            elif new_incumbent == challenger:
                incumbent = challenger
                break
            else:  # Line 17
                # challenger is not worse, continue
                N = 2 * N

        return incumbent
Example #19
0
    def _add_inc_run(self, incumbent: Configuration, run_history: RunHistory):
        '''
            add new run for incumbent
            Side effect: adds runs to <run_history>

            Parameters
            ----------
            incumbent : Configuration
                best configuration so far
            run_history : RunHistory
                stores all runs we ran so far
        '''

        inc_runs = run_history.get_runs_for_config(incumbent)

        # Line 3
        # First evaluate incumbent on a new instance
        if len(inc_runs) < self.maxR:
            while True:
                # Line 4
                # find all instances that have the most runs on the inc
                inc_runs = run_history.get_runs_for_config(incumbent)
                inc_inst = [s.instance for s in inc_runs]
                inc_inst = list(Counter(inc_inst).items())
                inc_inst.sort(key=lambda x: x[1], reverse=True)
                try:
                    max_runs = inc_inst[0][1]
                except IndexError:
                    self.logger.debug("No run for incumbent found")
                    max_runs = 0
                inc_inst = set([x[0] for x in inc_inst if x[1] == max_runs])

                available_insts = (self.instances - inc_inst)

                # if all instances were used n times, we can pick an instances
                # from the complete set again
                if not self.deterministic and not available_insts:
                    available_insts = self.instances

                # Line 6 (Line 5 is further down...)
                if self.deterministic:
                    next_seed = 0
                else:
                    next_seed = self.rs.randint(low=0, high=MAXINT, size=1)[0]

                if available_insts:
                    # Line 5 (here for easier code)
                    next_instance = self.rs.choice(list(available_insts))
                    # Line 7
                    self.logger.debug("Add run of incumbent")
                    status, cost, dur, res = self.tae_runner.start(
                        config=incumbent,
                        instance=next_instance,
                        seed=next_seed,
                        cutoff=self.cutoff,
                        instance_specific=self.instance_specifics.get(
                            next_instance, "0"))

                    self._num_run += 1
                else:
                    self.logger.debug(
                        "No further instance-seed pairs for incumbent available."
                    )
                    break

                inc_runs = run_history.get_runs_for_config(incumbent)
                # Termination condition; after exactly one run, this checks
                # whether further runs are necessary due to minR
                if len(inc_runs) >= self.minR or len(inc_runs) >= self.maxR:
                    break
Example #20
0
    def intensify(self,
                  challengers: typing.List[Configuration],
                  incumbent: Configuration,
                  run_history: RunHistory,
                  aggregate_func: typing.Callable,
                  time_bound: int = MAXINT):
        '''
            running intensification to determine the incumbent configuration
            Side effect: adds runs to run_history

            Implementation of Procedure 2 in Hutter et al. (2011).

            Parameters
            ----------

            challengers : typing.List[Configuration]
                promising configurations
            incumbent : Configuration
                best configuration so far
            run_history : RunHistory
                stores all runs we ran so far
            aggregate_func: typing.Callable
                aggregate performance across instances
            time_bound : int, optional (default=2 ** 31 - 1)
                time in [sec] available to perform intensify

            Returns
            -------
            incumbent: Configuration()
                current (maybe new) incumbent configuration
            inc_perf: float
                empirical performance of incumbent configuration
        '''

        self.start_time = time.time()

        if time_bound < 0.01:
            raise ValueError("time_bound must be >= 0.01")

        self._num_run = 0
        self._chall_indx = 0

        # Line 1 + 2
        for challenger in challengers:
            if challenger == incumbent:
                self.logger.warning(
                    "Challenger was the same as the current incumbent; Skipping challenger"
                )
                continue

            self.logger.debug("Intensify on %s", challenger)
            if hasattr(challenger, 'origin'):
                self.logger.debug("Configuration origin: %s",
                                  challenger.origin)

            try:
                # Lines 3-7
                self._add_inc_run(incumbent=incumbent, run_history=run_history)

                # Lines 8-17
                incumbent = self._race_challenger(
                    challenger=challenger,
                    incumbent=incumbent,
                    run_history=run_history,
                    aggregate_func=aggregate_func)
            except BudgetExhaustedException:
                # We return incumbent, SMBO stops due to its own budget checks
                inc_perf = run_history.get_cost(incumbent)
                self.logger.debug("Budget exhausted; Return incumbent")
                return incumbent, inc_perf

            if self._chall_indx > 1 and self._num_run > self.run_limit:
                self.logger.debug("Maximum #runs for intensification reached")
                break
            elif self._chall_indx > 1 and time.time(
            ) - self.start_time - time_bound >= 0:
                self.logger.debug("Timelimit for intensification reached ("
                                  "used: %f sec, available: %f sec)" %
                                  (time.time() - self.start_time, time_bound))
                break

        # output estimated performance of incumbent
        inc_runs = run_history.get_runs_for_config(incumbent)
        inc_perf = aggregate_func(incumbent, run_history, inc_runs)
        self.logger.info(
            "Updated estimated performance of incumbent on %d runs: %.4f" %
            (len(inc_runs), inc_perf))

        self.stats.update_average_configs_per_intensify(
            n_configs=self._chall_indx)

        return incumbent, inc_perf
Example #21
0
    def _get_runs_per_config_quantiled(self, rh, conf_list, quantiles):
        """Returns a list of lists, each sublist representing the current state
        at that timestep (quantile). The current state means a list of times
        each config was evaluated at that timestep.

        Parameters
        ----------
        rh: RunHistory
            rh to be split up
        conf_list: list
            list of all Configuration objects that appear in runhistory
        quantiles: int
            number of fractions to split rh into

        Returns:
        --------
        labels: List[str]
            labels for timeslider (i.e. wallclock-times)
        runs_per_quantile: np.array
            numpy array of runs per configuration per quantile
        """
        runs_total = len(rh.data)
        # Iterate over the runhistory's entries in ranges and creating each
        # sublist from a "snapshot"-runhistory
        labels, last_time_seen = [], -1  # label, means wallclocktime at splitting points
        r_p_q_p_c = []  # runs per quantile per config
        as_list = list(rh.data.items())
        scale = np.geomspace if self.timeslider_log else np.linspace

        # Trying to work with timestamps if they are available
        timestamps = None
        try:
            as_list = sorted(as_list, key=lambda x: x[1].additional_info['timestamps']['finished'])
            timestamps = [x[1].additional_info['timestamps']['finished'] for x in as_list]
            time_ranges = scale(timestamps[0], timestamps[-1], num=quantiles+1, endpoint=True)
            ranges = []
            idx = 0
            for time_idx, time in enumerate(time_ranges):
                while len(timestamps) - 1 > idx and (timestamps[idx] < time or idx <= time_idx):
                    idx += 1
                ranges.append(idx)
        except (KeyError, TypeError) as err:
            self.logger.debug(err)
            self.logger.debug("Failed to sort by timestamps... only a reason to worry if this is BOHB-analysis")
            ranges = [int(x) for x in scale(1, runs_total, num=quantiles+1)]
        # Fix possible wrong values
        ranges[0] = 0
        ranges[-1] = len(as_list)

        self.logger.debug("Creating %d quantiles with a total number of runs of %d", quantiles, runs_total)
        self.logger.debug("Ranges: %s", str(ranges))

        for r in range(len(ranges))[1:]:
            if ranges[r] <= ranges[r-1]:
                if ranges[r-1] + 1 >= len(as_list):
                    raise RuntimeError("There was a problem with the quantiles of the configuration footprint. "
                                       "Please report this Error on \"https://github.com/automl/CAVE/issues\" and provide the debug.txt-file.")
                ranges[r] = ranges[r-1] + 1
                self.logger.debug("Fixed ranges to: %s", str(ranges))

        # Sanity check
        if not ranges[0] == 0 or not ranges[-1] == len(as_list) or not len(ranges) == quantiles + 1:
            raise RuntimeError("Sanity check on range-creation in configurator footprint went wrong. "
                               "Please report this Error on \"https://github.com/automl/CAVE/issues\" and provide the debug.txt-file.")

        tmp_rh = RunHistory(average_cost)
        for i, j in zip(ranges[:-1], ranges[1:]):
            for idx in range(i, j):
                k, v = as_list[idx]
                tmp_rh.add(config=rh.ids_config[k.config_id],
                           cost=v.cost, time=v.time, status=v.status,
                           instance_id=k.instance_id, seed=k.seed,
                           additional_info=v.additional_info)
            if timestamps:
                labels.append("{0:.2f}".format(timestamps[j - 1]))
            r_p_q_p_c.append([len(tmp_rh.get_runs_for_config(c)) for c in conf_list])
        self.logger.debug("Labels: " + str(labels))
        return labels, r_p_q_p_c
Example #22
0
    def get_next_run(
        self,
        challengers: typing.Optional[typing.List[Configuration]],
        incumbent: Configuration,
        chooser: typing.Optional[EPMChooser],
        run_history: RunHistory,
        repeat_configs: bool = True,
        num_workers: int = 1,
    ) -> typing.Tuple[RunInfoIntent, RunInfo]:
        """
        This procedure is in charge of generating a RunInfo object to comply
        with lines 7 (in case stage is stage==RUN_INCUMBENT) or line 12
        (In case of stage==RUN_CHALLENGER)

        A RunInfo object encapsulates the necessary information for a worker
        to execute the job, nevertheless, a challenger is not always available.
        This could happen because no more configurations are available or the new
        configuration to try was already executed.

        To circumvent this, a intent is also returned:

        - (intent=RUN) Run the RunInfo object (Normal Execution
        - (intent=SKIP) Skip this iteration. No challenger is available, in particular
            because challenger is the same as incumbent

        Parameters
        ----------
        challengers : typing.List[Configuration]
            promising configurations
        incumbent: Configuration
            incumbent configuration
        chooser : smac.optimizer.epm_configuration_chooser.EPMChooser
            optimizer that generates next configurations to use for racing
        run_history : RunHistory
            stores all runs we ran so far
        repeat_configs : bool
            if False, an evaluated configuration will not be generated again
        num_workers: int
            the maximum number of workers available
            at a given time.

        Returns
        -------
        intent: RunInfoIntent
            What should the smbo object do with the runinfo.
        run_info: RunInfo
            An object that encapsulates necessary information for a config run
        """
        if num_workers > 1:
            raise ValueError(
                "Intensifier does not support more than 1 worker, yet "
                "the argument num_workers to get_next_run is {}".format(
                    num_workers))

        # If this function is called, it means the iteration is
        # not complete (we can be starting a new iteration, or re-running a
        # challenger due to line 17). We evaluate if a iteration is complete or not
        # via _process_results
        self.iteration_done = False

        # In case a crash happens, and FirstRunCrashedException prevents a
        # failure, revert back to running the incumbent
        # Challenger case is by construction ok, as there is no special
        # stage for its processing
        if self.stage == IntensifierStage.PROCESS_FIRST_CONFIG_RUN:
            self.stage = IntensifierStage.RUN_FIRST_CONFIG
        elif self.stage == IntensifierStage.PROCESS_INCUMBENT_RUN:
            self.stage = IntensifierStage.RUN_INCUMBENT

        # if first ever run, then assume current challenger to be the incumbent
        # Because this is the first ever run, we need to sample a new challenger
        # This new challenger is also assumed to be the incumbent
        if self.stage == IntensifierStage.RUN_FIRST_CONFIG:
            if incumbent is None:
                self.logger.info("First run, no incumbent provided;"
                                 " challenger is assumed to be the incumbent")
                challenger, self.new_challenger = self.get_next_challenger(
                    challengers=challengers,
                    chooser=chooser,
                )
                incumbent = challenger
            else:
                inc_runs = run_history.get_runs_for_config(
                    incumbent, only_max_observed_budget=True)
                if len(inc_runs) > 0:
                    self.logger.debug("Skipping RUN_FIRST_CONFIG stage since "
                                      "incumbent has already been ran")
                    self.stage = IntensifierStage.RUN_INCUMBENT

        # LINES 3-7
        if self.stage in [
                IntensifierStage.RUN_FIRST_CONFIG,
                IntensifierStage.RUN_INCUMBENT
        ]:

            # Line 3
            # A modified version, that not only checks for maxR
            # but also makes sure that there are runnable instances,
            # that is, instances has not been exhausted
            inc_runs = run_history.get_runs_for_config(
                incumbent, only_max_observed_budget=True)

            # Line 4
            available_insts = self._get_inc_available_inst(
                incumbent, run_history)
            if available_insts and len(inc_runs) < self.maxR:
                # Lines 5-6-7
                instance, seed, cutoff = self._get_next_inc_run(
                    available_insts)

                instance_specific = "0"
                if instance is not None:
                    instance_specific = self.instance_specifics.get(
                        instance, "0")

                return RunInfoIntent.RUN, RunInfo(
                    config=incumbent,
                    instance=instance,
                    instance_specific=instance_specific,
                    seed=seed,
                    cutoff=cutoff,
                    capped=False,
                    budget=0.0,
                )
            else:
                # This point marks the transitions from lines 3-7
                # to 8-18.

                self.logger.debug(
                    "No further instance-seed pairs for incumbent available.")

                self.stage = IntensifierStage.RUN_CHALLENGER

        # Understand who is the active challenger.
        if self.stage == IntensifierStage.RUN_BASIS:
            # if in RUN_BASIS stage,
            # return the basis configuration (i.e., `always_race_against`)
            self.logger.debug(
                "Race against basis configuration after incumbent change.")
            challenger = self.always_race_against
        elif self.current_challenger and self.continue_challenger:
            # if the current challenger could not be rejected,
            # it is run again on more instances
            challenger = self.current_challenger
        else:
            # Get a new challenger if all instance/pairs have
            # been completed. Else return the currently running
            # challenger
            challenger, self.new_challenger = self.get_next_challenger(
                challengers=challengers,
                chooser=chooser,
            )

        # No new challengers are available for this iteration,
        # Move to the next iteration. This can only happen
        # when all configurations for this iteration are exhausted
        # and have been run in all proposed instance/pairs.
        if challenger is None:
            return RunInfoIntent.SKIP, RunInfo(
                config=None,
                instance=None,
                instance_specific="0",
                seed=0,
                cutoff=self.cutoff,
                capped=False,
                budget=0.0,
            )

        # Skip the iteration if the challenger was previously run
        if challenger == incumbent and self.stage == IntensifierStage.RUN_CHALLENGER:
            self.challenger_same_as_incumbent = True
            self.logger.debug(
                "Challenger was the same as the current incumbent; Skipping challenger"
            )
            return RunInfoIntent.SKIP, RunInfo(
                config=None,
                instance=None,
                instance_specific="0",
                seed=0,
                cutoff=self.cutoff,
                capped=False,
                budget=0.0,
            )

        self.logger.debug("Intensify on %s", challenger)
        if hasattr(challenger, 'origin'):
            self.logger.debug("Configuration origin: %s", challenger.origin)

        if self.stage in [
                IntensifierStage.RUN_CHALLENGER, IntensifierStage.RUN_BASIS
        ]:

            if not self.to_run:
                self.to_run, self.inc_sum_cost = self._get_instances_to_run(
                    incumbent=incumbent,
                    challenger=challenger,
                    run_history=run_history,
                    N=self.N)

            is_there_time_due_to_adaptive_cap = self._is_there_time_due_to_adaptive_cap(
                challenger=challenger,
                run_history=run_history,
            )

            # If there is no more configs to run in this iteration, or no more
            # time to do so, change the current stage base on how the current
            # challenger performs as compared to the incumbent. This is done
            # via _process_racer_results
            if len(self.to_run) == 0 or not is_there_time_due_to_adaptive_cap:

                # If no more time, stage transition is a must
                if not is_there_time_due_to_adaptive_cap:
                    self.stage = IntensifierStage.RUN_INCUMBENT
                    self.logger.debug("Stop challenger itensification due "
                                      "to adaptive capping.")

                # Nevertheless, if there are no more instances to run,
                # we might need to comply with line 17 and keep running the
                # same challenger. In this case, if there is not enough information
                # to decide if the challenger is better/worst than the incumbent,
                # line 17 doubles the number of instances to run.
                self.logger.debug("No further runs for challenger possible")
                self._process_racer_results(
                    challenger=challenger,
                    incumbent=incumbent,
                    run_history=run_history,
                )

                # Request SMBO to skip this run. This function will
                # be called again, after the _process_racer_results
                # has updated the intensifier stage
                return RunInfoIntent.SKIP, RunInfo(
                    config=None,
                    instance=None,
                    instance_specific="0",
                    seed=0,
                    cutoff=self.cutoff,
                    capped=False,
                    budget=0.0,
                )

            else:
                # Lines 8-11
                incumbent, instance, seed, cutoff = self._get_next_racer(
                    challenger=challenger,
                    incumbent=incumbent,
                    run_history=run_history,
                )

                capped = False
                if (self.cutoff is not None) and (
                        cutoff <
                        self.cutoff):  # type: ignore[operator] # noqa F821
                    capped = True

                instance_specific = "0"
                if instance is not None:
                    instance_specific = self.instance_specifics.get(
                        instance, "0")

                # Line 12
                return RunInfoIntent.RUN, RunInfo(
                    config=challenger,
                    instance=instance,
                    instance_specific=instance_specific,
                    seed=seed,
                    cutoff=cutoff,
                    capped=capped,
                    budget=0.0,
                )
        else:
            raise ValueError('No valid stage found!')
Example #23
0
    def _compare_configs(self,
                         incumbent: Configuration,
                         challenger: Configuration,
                         run_history: RunHistory,
                         log_traj: bool = True) -> typing.Optional[Configuration]:
        """
        Compare two configuration wrt the runhistory and return the one which
        performs better (or None if the decision is not safe)

        Decision strategy to return x as being better than y:
            1. x has at least as many runs as y
            2. x performs better than y on the intersection of runs on x and y

        Implicit assumption:
            Challenger was evaluated on the same instance-seed pairs as
            incumbent

        Parameters
        ----------
        incumbent: Configuration
            Current incumbent
        challenger: Configuration
            Challenger configuration
        run_history: smac.runhistory.runhistory.RunHistory
            Stores all runs we ran so far
        log_traj: bool
            Whether to log changes of incumbents in trajectory

        Returns
        -------
        None or better of the two configurations x,y
        """

        inc_runs = run_history.get_runs_for_config(incumbent, only_max_observed_budget=True)
        chall_runs = run_history.get_runs_for_config(challenger, only_max_observed_budget=True)
        to_compare_runs = set(inc_runs).intersection(chall_runs)

        # performance on challenger runs
        chal_perf = run_history.average_cost(challenger, to_compare_runs)
        inc_perf = run_history.average_cost(incumbent, to_compare_runs)

        # Line 15
        if chal_perf > inc_perf and len(chall_runs) >= self.minR:
            # Incumbent beats challenger
            self.logger.debug("Incumbent (%.4f) is better than challenger "
                              "(%.4f) on %d runs." %
                              (inc_perf, chal_perf, len(chall_runs)))
            return incumbent

        # Line 16
        if not set(inc_runs) - set(chall_runs):

            # no plateau walks
            if chal_perf >= inc_perf:
                self.logger.debug("Incumbent (%.4f) is at least as good as the "
                                  "challenger (%.4f) on %d runs." %
                                  (inc_perf, chal_perf, len(chall_runs)))
                return incumbent

            # Challenger is better than incumbent
            # and has at least the same runs as inc
            # -> change incumbent
            n_samples = len(chall_runs)
            self.logger.info("Challenger (%.4f) is better than incumbent (%.4f)"
                             " on %d runs." % (chal_perf, inc_perf, n_samples))
            self._log_incumbent_changes(incumbent, challenger)

            if log_traj:
                self.stats.inc_changed += 1
                self.traj_logger.add_entry(train_perf=chal_perf,
                                           incumbent_id=self.stats.inc_changed,
                                           incumbent=challenger)
            return challenger

        # undecided
        return None
Example #24
0
class TestIntensify(unittest.TestCase):
    def setUp(self):
        unittest.TestCase.setUp(self)

        self.rh = RunHistory()
        self.cs = get_config_space()
        self.config1 = Configuration(self.cs, values={'a': 0, 'b': 100})
        self.config2 = Configuration(self.cs, values={'a': 100, 'b': 0})
        self.config3 = Configuration(self.cs, values={'a': 100, 'b': 100})

        self.scen = Scenario({
            "cutoff_time": 2,
            'cs': self.cs,
            "run_obj": 'runtime',
            "output_dir": ''
        })
        self.stats = Stats(scenario=self.scen)
        self.stats.start_timing()

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

    def test_race_challenger(self):
        """
           test _race_challenger without adaptive capping
        """
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  run_obj_time=False)

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)
        intensifier.N = 1

        inc = intensifier._race_challenger(challenger=self.config2,
                                           incumbent=self.config1,
                                           run_history=self.rh)

        self.assertEqual(inc, self.config2)

    def test_race_challenger_2(self):
        """
           test _race_challenger with adaptive capping
        """
        def target(x):
            time.sleep(1.5)
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats, run_obj="runtime")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=.001,
                    time=0.001,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=12345,
                    additional_info=None)
        intensifier.N = 1

        # config2 should have a timeout (due to adaptive capping)
        # and config1 should still be the incumbent
        inc = intensifier._race_challenger(
            challenger=self.config2,
            incumbent=self.config1,
            run_history=self.rh,
        )

        # self.assertTrue(False)
        self.assertEqual(inc, self.config1)

    def test_race_challenger_3(self):
        """
           test _race_challenger with adaptive capping on a previously capped configuration
        """
        def target(config: Configuration, seed: int, instance: str):
            if instance == 1:
                time.sleep(2.1)
            else:
                time.sleep(0.6)
            return (config['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                run_obj="runtime",
                                par_factor=1)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  cutoff=2,
                                  instances=[1])

        self.rh.add(config=self.config1,
                    cost=0.5,
                    time=.5,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=12345,
                    additional_info=None)

        # config2 should have a timeout (due to adaptive capping)
        # and config1 should still be the incumbent
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config3], chooser=None)
        inc = intensifier._race_challenger(
            challenger=config,
            incumbent=self.config1,
            run_history=self.rh,
        )
        self.assertEqual(inc, self.config1)

        # further run for incumbent
        self.rh.add(config=self.config1,
                    cost=2,
                    time=2,
                    status=StatusType.TIMEOUT,
                    instance_id=2,
                    seed=12345,
                    additional_info=None)

        # give config2 a second chance - now it should run on both instances

        # run on instance 1
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config3], chooser=None)
        inc = intensifier._race_challenger(
            challenger=config,
            incumbent=self.config1,
            run_history=self.rh,
        )

        # run on instance 2
        config, _ = intensifier.get_next_challenger(challengers=[self.config3],
                                                    chooser=None)
        self.assertEqual(config, self.config2)
        self.assertTrue(intensifier.continue_challenger)

        inc = intensifier._race_challenger(
            challenger=config,
            incumbent=self.config1,
            run_history=self.rh,
        )

        # the incumbent should still be config1 because
        # config2 should get on inst 1 a full timeout
        # such that c(config1) = 1.25 and c(config2) close to 1.3
        self.assertEqual(inc, self.config1)
        # the capped run should not be counted in runs_perf_config
        self.assertAlmostEqual(self.rh.num_runs_per_config[2], 2)
        self.assertFalse(intensifier.continue_challenger)

    def test_race_challenger_large(self):
        """
           test _race_challenger using solution_quality
        """
        def target(x):
            return 1

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=list(range(10)),
                                  run_obj_time=False,
                                  deterministic=True)

        for i in range(10):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=12345,
                        additional_info=None)

        intensifier.stage = IntensifierStage.RUN_CHALLENGER

        # tie on first instances and then challenger should always win
        # and be returned as inc
        while True:
            config, _ = intensifier.get_next_challenger(
                challengers=[self.config2, self.config3], chooser=None)
            inc = intensifier._race_challenger(
                challenger=config,
                incumbent=self.config1,
                run_history=self.rh,
            )

            # stop when challenger evaluation is over
            if not intensifier.stage == IntensifierStage.RUN_CHALLENGER:
                break

        self.assertEqual(inc, self.config2)
        self.assertEqual(self.rh.get_cost(self.config2), 1)

        # get data for config2 to check that the correct run was performed
        runs = self.rh.get_runs_for_config(self.config2,
                                           only_max_observed_budget=True)
        self.assertEqual(len(runs), 10)

    def test_race_challenger_large_blocked_seed(self):
        """
           test _race_challenger whether seeds are blocked for challenger runs
        """
        def target(x):
            return 1

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=list(range(10)),
                                  run_obj_time=False,
                                  deterministic=False)

        for i in range(10):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)

        intensifier.stage = IntensifierStage.RUN_CHALLENGER

        # tie on first instances and then challenger should always win
        # and be returned as inc
        while True:
            config, _ = intensifier.get_next_challenger(
                challengers=[self.config2, self.config3], chooser=None)
            inc = intensifier._race_challenger(
                challenger=config,
                incumbent=self.config1,
                run_history=self.rh,
            )

            # stop when challenger evaluation is over
            if not intensifier.stage == IntensifierStage.RUN_CHALLENGER:
                break

        self.assertEqual(inc, self.config2)
        self.assertEqual(self.rh.get_cost(self.config2), 1)

        # get data for config2 to check that the correct run was performed
        runs = self.rh.get_runs_for_config(self.config2,
                                           only_max_observed_budget=True)
        self.assertEqual(len(runs), 10)

        seeds = sorted([r.seed for r in runs])
        self.assertEqual(seeds, list(range(10)), seeds)

    def test_add_inc_run_det(self):
        """
            test _add_inc_run()
        """
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                run_obj="solution_quality")
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  deterministic=True)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

        # since we assume deterministic=1,
        # the second call should not add any more runs
        # given only one instance
        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

    def test_add_inc_run_nondet(self):
        """
            test _add_inc_run()
        """
        def target(x):
            return (x['a'] + 1) / 1000.

        taf = ExecuteTAFuncDict(ta=target,
                                stats=self.stats,
                                runhistory=self.rh,
                                run_obj="solution_quality")

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1, 2],
                                  deterministic=False)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 1, self.rh.data)

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 2, self.rh.data)
        runs = self.rh.get_runs_for_config(config=self.config1,
                                           only_max_observed_budget=True)
        # exactly one run on each instance
        self.assertIn(1, [runs[0].instance, runs[1].instance])
        self.assertIn(2, [runs[0].instance, runs[1].instance])

        intensifier._add_inc_run(incumbent=self.config1, run_history=self.rh)
        self.assertEqual(len(self.rh.data), 3, self.rh.data)

    def test_get_next_challenger(self):
        """
            test get_next_challenger()
        """
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  deterministic=True)

        intensifier.stage = IntensifierStage.RUN_CHALLENGER

        # get a new challenger to evaluate
        config, new = intensifier.get_next_challenger(
            challengers=[self.config1, self.config2], chooser=None)

        self.assertEqual(config, self.config1, intensifier.current_challenger)
        self.assertEqual(intensifier._chall_indx, 1)
        self.assertEqual(intensifier.N, 1)
        self.assertTrue(new)

        # when already evaluating a challenger, return the same challenger
        intensifier.to_run = [(1, 1, 0)]
        config, new = intensifier.get_next_challenger(
            challengers=[self.config2], chooser=None)
        self.assertEqual(config, self.config1, intensifier.current_challenger)
        self.assertEqual(intensifier._chall_indx, 1)
        self.assertFalse(new)

    def test_generate_challenger(self):
        """
            test generate_challenger()
        """
        # test get generator from a list of challengers
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=None,
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  deterministic=True)

        gen = intensifier._generate_challengers(
            challengers=[self.config1, self.config2], chooser=None)

        self.assertEqual(next(gen), self.config1)
        self.assertEqual(next(gen), self.config2)
        self.assertRaises(StopIteration, next, gen)

        # test get generator from a chooser - would return only 1 configuration
        intensifier = Intensifier(tae_runner=None,
                                  stats=self.stats,
                                  traj_logger=None,
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  deterministic=True)
        chooser = SMAC4AC(self.scen, rng=1).solver.epm_chooser

        gen = intensifier._generate_challengers(challengers=None,
                                                chooser=chooser)

        self.assertEqual(next(gen).get_dictionary(), {'a': 24, 'b': 68})
        self.assertRaises(StopIteration, next, gen)

        # when both are none, raise error
        with self.assertRaisesRegex(ValueError,
                                    "No configurations/chooser provided"):
            intensifier._generate_challengers(challengers=None, chooser=None)

    def test_eval_challenger(self):
        """
            test eval_challenger() - a complete intensification run
        """
        def target(x):
            return x['a']

        taf = ExecuteTAFuncDict(ta=target, stats=self.stats)
        taf.runhistory = self.rh

        intensifier = Intensifier(tae_runner=taf,
                                  stats=self.stats,
                                  traj_logger=TrajLogger(output_dir=None,
                                                         stats=self.stats),
                                  rng=np.random.RandomState(12345),
                                  instances=[1],
                                  run_obj_time=False,
                                  deterministic=False,
                                  always_race_against=self.config3,
                                  run_limit=1)

        # run incumbent first if it was not run before
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config1, self.config3],
            chooser=None)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=None,
            run_history=self.rh,
        )

        self.assertEqual(inc, self.config2)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_CHALLENGER)

        # run challenger now that the incumbent has been executed
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config1, self.config3],
            chooser=None)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )

        # challenger should have a better performance, so incumbent should have changed
        self.assertEqual(inc, self.config1)
        self.assertEqual(self.stats.inc_changed, 1)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_DEFAULT)
        self.assertFalse(intensifier.continue_challenger)

        # run `always_race_against` now since the incumbent has changed
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config1, self.config3],
            chooser=None)
        inc, _ = intensifier.eval_challenger(
            challenger=config,
            incumbent=inc,
            run_history=self.rh,
        )

        self.assertEqual(inc, self.config1)
        self.assertEqual(intensifier.stage, IntensifierStage.RUN_INCUMBENT)
        self.assertEqual(
            len(
                self.rh.get_runs_for_config(self.config3,
                                            only_max_observed_budget=True)), 1)
        self.assertEqual(intensifier.n_iters, 1)
        self.assertIsInstance(intensifier.configs_to_run, collections.Iterator)
        with self.assertRaises(StopIteration):
            next(intensifier.configs_to_run)
Example #25
0
class TestAbstractIntensifier(unittest.TestCase):
    def setUp(self):
        unittest.TestCase.setUp(self)

        self.rh = RunHistory()
        self.cs = get_config_space()
        self.config1 = Configuration(self.cs, values={'a': 0, 'b': 100})
        self.config2 = Configuration(self.cs, values={'a': 100, 'b': 0})
        self.config3 = Configuration(self.cs, values={'a': 100, 'b': 100})

        self.scen = Scenario({
            "cutoff_time": 2,
            'cs': self.cs,
            "run_obj": 'runtime',
            "output_dir": ''
        })
        self.stats = Stats(scenario=self.scen)
        self.stats.start_timing()

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

    def test_get_next_challenger(self):
        """
            test get_next_challenger - pick from list/chooser
        """
        intensifier = AbstractRacer(tae_runner=None,
                                    stats=self.stats,
                                    traj_logger=None,
                                    rng=np.random.RandomState(12345),
                                    deterministic=True,
                                    run_obj_time=False,
                                    cutoff=1,
                                    instances=[1])

        # Error when nothing to choose from
        with self.assertRaisesRegex(ValueError,
                                    "No configurations/chooser provided"):
            intensifier.get_next_challenger(challengers=None,
                                            chooser=None,
                                            run_history=self.rh)

        # next challenger from a list
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config1, self.config2],
            chooser=None,
            run_history=self.rh)
        self.assertEqual(config, self.config1)

        config, _ = intensifier.get_next_challenger(
            challengers=[self.config2, self.config3],
            chooser=None,
            run_history=self.rh)
        self.assertEqual(config, self.config2)

        # next challenger from a chooser
        intensifier = AbstractRacer(tae_runner=None,
                                    stats=self.stats,
                                    traj_logger=None,
                                    rng=np.random.RandomState(12345),
                                    deterministic=True,
                                    run_obj_time=False,
                                    cutoff=1,
                                    instances=[1])
        chooser = SMAC4AC(self.scen, rng=1).solver.epm_chooser

        config, _ = intensifier.get_next_challenger(challengers=None,
                                                    chooser=chooser,
                                                    run_history=self.rh)
        self.assertEqual(len(list(config.get_dictionary().values())), 2)
        self.assertTrue(24 in config.get_dictionary().values())
        self.assertTrue(68 in config.get_dictionary().values())

        config, _ = intensifier.get_next_challenger(challengers=None,
                                                    chooser=chooser,
                                                    run_history=self.rh)
        self.assertEqual(len(list(config.get_dictionary().values())), 2)
        self.assertTrue(95 in config.get_dictionary().values())
        self.assertTrue(38 in config.get_dictionary().values())

    def test_get_next_challenger_repeat(self):
        """
            test get_next_challenger - repeat configurations
        """
        intensifier = AbstractRacer(tae_runner=None,
                                    stats=self.stats,
                                    traj_logger=None,
                                    rng=np.random.RandomState(12345),
                                    deterministic=True,
                                    run_obj_time=False,
                                    cutoff=1,
                                    instances=[1])

        # should not repeat configurations
        self.rh.add(self.config1, 1, 1, StatusType.SUCCESS)
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config1, self.config2],
            chooser=None,
            run_history=self.rh,
            repeat_configs=False)

        self.assertEqual(config, self.config2)

        # should repeat configurations
        config, _ = intensifier.get_next_challenger(
            challengers=[self.config1, self.config2],
            chooser=None,
            run_history=self.rh,
            repeat_configs=True)

        self.assertEqual(config, self.config1)

    def test_compare_configs_no_joint_set(self):
        intensifier = AbstractRacer(tae_runner=None,
                                    stats=self.stats,
                                    traj_logger=TrajLogger(output_dir=None,
                                                           stats=self.stats),
                                    rng=None,
                                    instances=[1])

        for i in range(2):
            self.rh.add(config=self.config1,
                        cost=2,
                        time=2,
                        status=StatusType.SUCCESS,
                        instance_id=1,
                        seed=i,
                        additional_info=None)

        for i in range(2, 5):
            self.rh.add(config=self.config2,
                        cost=1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=1,
                        seed=i,
                        additional_info=None)

        # The sets for the incumbent are completely disjoint.
        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh)
        self.assertIsNone(conf)

        # The incumbent has still one instance-seed pair left on which the
        # challenger was not run yet.
        self.rh.add(config=self.config2,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=1,
                    additional_info=None)
        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh)
        self.assertIsNone(conf)

    def test_compare_configs_chall(self):
        """
            challenger is better
        """
        intensifier = AbstractRacer(tae_runner=None,
                                    stats=self.stats,
                                    traj_logger=TrajLogger(output_dir=None,
                                                           stats=self.stats),
                                    rng=None,
                                    instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=2,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config2,
                    cost=0,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh)

        # challenger has enough runs and is better
        self.assertEqual(conf, self.config2, "conf: %s" % (conf))

    def test_compare_configs_inc(self):
        """
            incumbent is better
        """
        intensifier = AbstractRacer(tae_runner=None,
                                    stats=self.stats,
                                    traj_logger=TrajLogger(output_dir=None,
                                                           stats=self.stats),
                                    rng=None,
                                    instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config2,
                    cost=2,
                    time=2,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh)

        # challenger worse than inc
        self.assertEqual(conf, self.config1, "conf: %s" % (conf))

    def test_compare_configs_unknow(self):
        """
            challenger is better but has less runs;
            -> no decision (None)
        """
        intensifier = AbstractRacer(tae_runner=None,
                                    stats=self.stats,
                                    traj_logger=TrajLogger(output_dir=None,
                                                           stats=self.stats),
                                    rng=None,
                                    instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config1,
                    cost=1,
                    time=2,
                    status=StatusType.SUCCESS,
                    instance_id=2,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=2,
                    seed=None,
                    additional_info=None)

        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh)

        # challenger worse than inc
        self.assertIsNone(conf, "conf: %s" % (conf))

    def test_adaptive_capping(self):
        """
            test _adapt_cutoff()
        """
        intensifier = AbstractRacer(tae_runner=None,
                                    stats=self.stats,
                                    traj_logger=TrajLogger(output_dir=None,
                                                           stats=self.stats),
                                    rng=np.random.RandomState(12345),
                                    instances=list(range(5)),
                                    deterministic=False)

        for i in range(5):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=i + 1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)
        for i in range(3):
            self.rh.add(config=self.config2,
                        cost=i + 1,
                        time=i + 1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)

        inst_seed_pairs = self.rh.get_runs_for_config(
            self.config1, only_max_observed_budget=True)
        # cost used by incumbent for going over all runs in inst_seed_pairs
        inc_sum_cost = self.rh.sum_cost(
            config=self.config1, instance_seed_budget_keys=inst_seed_pairs)

        cutoff = intensifier._adapt_cutoff(challenger=self.config2,
                                           run_history=self.rh,
                                           inc_sum_cost=inc_sum_cost)
        # 15*1.2 - 6
        self.assertEqual(cutoff, 12)

        intensifier.cutoff = 5

        cutoff = intensifier._adapt_cutoff(challenger=self.config2,
                                           run_history=self.rh,
                                           inc_sum_cost=inc_sum_cost)
        # scenario cutoff
        self.assertEqual(cutoff, 5)
Example #26
0
class TestAbstractRacer(unittest.TestCase):
    def setUp(self):
        unittest.TestCase.setUp(self)

        self.rh = RunHistory()
        self.cs = get_config_space()
        self.config1 = Configuration(self.cs, values={'a': 0, 'b': 100})
        self.config2 = Configuration(self.cs, values={'a': 100, 'b': 0})
        self.config3 = Configuration(self.cs, values={'a': 100, 'b': 100})

        self.scen = Scenario({
            "cutoff_time": 2,
            'cs': self.cs,
            "run_obj": 'runtime',
            "output_dir": ''
        })
        self.stats = Stats(scenario=self.scen)
        self.stats.start_timing()

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)

    def test_compare_configs_no_joint_set(self):
        intensifier = AbstractRacer(stats=self.stats,
                                    traj_logger=TrajLogger(output_dir=None,
                                                           stats=self.stats),
                                    rng=None,
                                    instances=[1])

        for i in range(2):
            self.rh.add(config=self.config1,
                        cost=2,
                        time=2,
                        status=StatusType.SUCCESS,
                        instance_id=1,
                        seed=i,
                        additional_info=None)

        for i in range(2, 5):
            self.rh.add(config=self.config2,
                        cost=1,
                        time=1,
                        status=StatusType.SUCCESS,
                        instance_id=1,
                        seed=i,
                        additional_info=None)

        # The sets for the incumbent are completely disjoint.
        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh)
        self.assertIsNone(conf)

        # The incumbent has still one instance-seed pair left on which the
        # challenger was not run yet.
        self.rh.add(config=self.config2,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=1,
                    additional_info=None)
        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh)
        self.assertIsNone(conf)

    def test_compare_configs_chall(self):
        """
            challenger is better
        """
        intensifier = AbstractRacer(stats=self.stats,
                                    traj_logger=TrajLogger(output_dir=None,
                                                           stats=self.stats),
                                    rng=None,
                                    instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=2,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config2,
                    cost=0,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh)

        # challenger has enough runs and is better
        self.assertEqual(conf, self.config2, "conf: %s" % (conf))

    def test_compare_configs_inc(self):
        """
            incumbent is better
        """
        intensifier = AbstractRacer(stats=self.stats,
                                    traj_logger=TrajLogger(output_dir=None,
                                                           stats=self.stats),
                                    rng=None,
                                    instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config2,
                    cost=2,
                    time=2,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh)

        # challenger worse than inc
        self.assertEqual(conf, self.config1, "conf: %s" % (conf))

    def test_compare_configs_unknow(self):
        """
            challenger is better but has less runs;
            -> no decision (None)
        """
        intensifier = AbstractRacer(stats=self.stats,
                                    traj_logger=TrajLogger(output_dir=None,
                                                           stats=self.stats),
                                    rng=None,
                                    instances=[1])

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=1,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config1,
                    cost=1,
                    time=2,
                    status=StatusType.SUCCESS,
                    instance_id=2,
                    seed=None,
                    additional_info=None)

        self.rh.add(config=self.config1,
                    cost=1,
                    time=1,
                    status=StatusType.SUCCESS,
                    instance_id=2,
                    seed=None,
                    additional_info=None)

        conf = intensifier._compare_configs(incumbent=self.config1,
                                            challenger=self.config2,
                                            run_history=self.rh)

        # challenger worse than inc
        self.assertIsNone(conf, "conf: %s" % (conf))

    def test_adaptive_capping(self):
        """
            test _adapt_cutoff()
        """
        intensifier = AbstractRacer(stats=self.stats,
                                    traj_logger=TrajLogger(output_dir=None,
                                                           stats=self.stats),
                                    rng=np.random.RandomState(12345),
                                    instances=list(range(5)),
                                    deterministic=False)

        for i in range(5):
            self.rh.add(config=self.config1,
                        cost=i + 1,
                        time=i + 1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)
        for i in range(3):
            self.rh.add(config=self.config2,
                        cost=i + 1,
                        time=i + 1,
                        status=StatusType.SUCCESS,
                        instance_id=i,
                        seed=i,
                        additional_info=None)

        inst_seed_pairs = self.rh.get_runs_for_config(
            self.config1, only_max_observed_budget=True)
        # cost used by incumbent for going over all runs in inst_seed_pairs
        inc_sum_cost = self.rh.sum_cost(
            config=self.config1, instance_seed_budget_keys=inst_seed_pairs)

        cutoff = intensifier._adapt_cutoff(challenger=self.config2,
                                           run_history=self.rh,
                                           inc_sum_cost=inc_sum_cost)
        # 15*1.2 - 6
        self.assertEqual(cutoff, 12)

        intensifier.cutoff = 5

        cutoff = intensifier._adapt_cutoff(challenger=self.config2,
                                           run_history=self.rh,
                                           inc_sum_cost=inc_sum_cost)
        # scenario cutoff
        self.assertEqual(cutoff, 5)
Example #27
0
    def __init__(self,
                 scenario: Scenario,
                 smac: Union[SMAC, None] = None,
                 mode: str = 'all',
                 X: Union[None, List[list], np.ndarray] = None,
                 y: Union[None, List[list], np.ndarray] = None,
                 numParams: int = -1,
                 impute: bool = False,
                 seed: int = 12345,
                 run: bool = False,
                 max_sample_size: int = -1,
                 fanova_cut_at_default: bool = False,
                 fANOVA_pairwise: bool = True,
                 forwardsel_feat_imp: bool = False,
                 incn_quant_var: bool = True,
                 marginalize_away_instances: bool = False,
                 save_folder: str = 'PIMP'):
        """
        Interface to be used with SMAC or with X and y matrices.
        :param scenario: The scenario object, that knows the configuration space.
        :param smac: The smac object that keeps all the run-data
        :param mode: The mode with which to run PIMP [ablation, fanova, all, forward-selection]
        :param X: Numpy Array that contains parameter arrays
        :param y: Numpy array that contains the corresponding performance values
        :param numParams: The number of parameters to evaluate
        :param impute: Flag to decide if censored data gets imputed or not
        :param seed: The random seed
        :param run: Flag to immediately compute the importance values after this setup or not.
        """
        self.scenario = scenario
        self.imp = None
        self.mode = mode
        self.save_folder = save_folder
        if not os.path.exists(self.save_folder): os.mkdir(self.save_folder)
        if smac is not None:
            self.imp = Importance(scenario=scenario,
                                  runhistory=smac.runhistory,
                                  incumbent=smac.solver.incumbent,
                                  seed=seed,
                                  parameters_to_evaluate=numParams,
                                  save_folder='PIMP',
                                  impute_censored=impute,
                                  max_sample_size=max_sample_size,
                                  fANOVA_cut_at_default=fanova_cut_at_default,
                                  fANOVA_pairwise=fANOVA_pairwise,
                                  forwardsel_feat_imp=forwardsel_feat_imp,
                                  incn_quant_var=incn_quant_var,
                                  preprocess=marginalize_away_instances)
        elif X is not None and y is not None:
            X = np.array(X)
            y = np.array(y)
            runHist = RunHistory(average_cost)
            if X.shape[0] != y.shape[0]:
                raise Exception('Number of samples in X and y dont match!')
            n_params = len(scenario.cs.get_hyperparameters())
            feats = None
            if X.shape[1] > n_params:
                feats = X[:, n_params:]
                assert feats.shape[1] == scenario.feature_array.shape[1]
                X = X[:, :n_params]

            for p in range(X.shape[1]):  # Normalize the data to fit into [0, 1]
                _min, _max = np.min(X[:, p]), np.max(X[:, p])
                if _min < 0. or 1 < _max:  # if it is not already normalized
                    for id, v in enumerate(X[:, p]):
                        X[id, p] = (v - _min) / (_max - _min)

            # Add everything to a runhistory such that PIMP can work with it
            for x, feat, y_val in zip(X, feats if feats is not None else X, y):
                id = None
                for inst in scenario.feature_dict:  # determine on which instance a configuration was run
                    if np.all(scenario.feature_dict[inst] == feat):
                        id = inst
                        break
                runHist.add(Configuration(scenario.cs, vector=x), y_val, 0, StatusType.SUCCESS, id)
            self.X = X
            self.y = y

            best_ = None  # Determine incumbent according to the best mean cost in the runhistory
            for config in runHist.config_ids:
                inst_seed_pairs = runHist.get_runs_for_config(config)
                all_ = []
                for inst, seed in inst_seed_pairs:
                    rk = RunKey(runHist.config_ids[config], inst, seed)
                    all_.append(runHist.data[rk].cost)
                mean = np.mean(all_)
                if best_ is None or best_[0] > mean:
                    best_ = (mean, config)
            incumbent = best_[1]
            self.imp = Importance(scenario=scenario,
                                  runhistory=runHist,
                                  seed=seed,
                                  parameters_to_evaluate=numParams,
                                  save_folder=self.save_folder,
                                  impute_censored=impute,
                                  incumbent=incumbent,
                                  fANOVA_cut_at_default=fanova_cut_at_default,
                                  fANOVA_pairwise=fANOVA_pairwise,
                                  forwardsel_feat_imp=forwardsel_feat_imp,
                                  incn_quant_var=incn_quant_var,
                                  preprocess=marginalize_away_instances
                                  )
        else:
            raise Exception('Neither X and y matrices nor a SMAC object were specified to compute the importance '
                            'values from!')

        if run:
            self.compute_importances()
Example #28
0
    def _process_racer_results(
        self,
        challenger: Configuration,
        incumbent: Configuration,
        run_history: RunHistory,
        log_traj: bool = True,
    ) -> typing.Optional[Configuration]:
        """Process the result of a racing configuration against the
        current incumbent. Might propose a new incumbent.

        Parameters
        ----------
        challenger : Configuration
            Configuration which challenges incumbent
        incumbent : Configuration
            Best configuration so far
        run_history : RunHistory
            Stores all runs we ran so far

        Returns
        -------
        new_incumbent: typing.Optional[Configuration]
            Either challenger or incumbent
        """
        chal_runs = run_history.get_runs_for_config(
            challenger, only_max_observed_budget=True)
        chal_perf = run_history.get_cost(challenger)
        # if all <instance, seed> have been run, compare challenger performance
        if not self.to_run:
            new_incumbent = self._compare_configs(incumbent=incumbent,
                                                  challenger=challenger,
                                                  run_history=run_history,
                                                  log_traj=log_traj)

            # update intensification stage
            if new_incumbent == incumbent:
                # move on to the next iteration
                self.stage = IntensifierStage.RUN_INCUMBENT
                self.continue_challenger = False
                self.logger.debug(
                    'Estimated cost of challenger on %d runs: %.4f, but worse than incumbent',
                    len(chal_runs), chal_perf)

            elif new_incumbent == challenger:
                # New incumbent found
                incumbent = challenger
                self.continue_challenger = False
                # compare against basis configuration if provided, else go to next iteration
                if self.always_race_against and \
                        self.always_race_against != challenger:
                    self.stage = IntensifierStage.RUN_BASIS
                else:
                    self.stage = IntensifierStage.RUN_INCUMBENT
                self.logger.debug(
                    'Estimated cost of challenger on %d runs: %.4f, becomes new incumbent',
                    len(chal_runs), chal_perf)

            else:  # Line 17
                # challenger is not worse, continue
                self.N = 2 * self.N
                self.continue_challenger = True
                self.logger.debug(
                    'Estimated cost of challenger on %d runs: %.4f, adding %d runs to the queue',
                    len(chal_runs), chal_perf, self.N / 2)
        else:
            self.logger.debug(
                'Estimated cost of challenger on %d runs: %.4f, still %d runs to go (continue racing)',
                len(chal_runs), chal_perf, len(self.to_run))

        return incumbent
Example #29
0
    def __init__(
        self,
        original_rh: RunHistory,
        validated_rh: RunHistory,
        validator: Validator,
        scenario: Scenario,
        default: Configuration,
        incumbent: Configuration,
        param_imp: Union[None, Dict[str, float]],
        params: Union[int, List[str]],
        n_configs: int,
        pc_sort_by: str,
        output_dir: str,
        cs: ConfigurationSpace,
        runtime: bool = False,
        max_runs_epm: int = 3000000,
    ):
        """This function prepares the data from a SMAC-related
        format (using runhistories and parameters) to a more general format
        (using a dataframe). The resulting dataframe is passed to the
        parallel_coordinates-routine

        Parameters
        ----------
        original_rh: RunHistory
            runhistory that should contain only runs that were executed during search
        validated_rh: RunHistory
            runhistory that may contain as many runs as possible, also external runs.
            this runhistory will be used to build the EPM
        validator: Validator
            validator to be used to estimate costs for configurations
        scenario: Scenario
            scenario object to take instances from
        default, incumbent: Configuration
            default and incumbent, they will surely be displayed
        param_imp: Union[None, Dict[str->float]
            if given, maps parameter-names to importance
        params: Union[int, List[str]]
            either directly the parameters to displayed or the number of parameters (will try to define the most
            important ones
        n_configs: int
            number of configs to be plotted
        pc_sort_by: str
            defines the pimp-method by which to choose the plotted parameters
        max_runs_epm: int
            maximum number of runs to train the epm with. this should prevent MemoryErrors
        output_dir: str
            output directory for plots
        cs: ConfigurationSpace
            parameter configuration space to be visualized
        runtime: boolean
            runtime will be on logscale
        """

        self.logger = logging.getLogger(self.__module__ + "." +
                                        self.__class__.__name__)
        self.error = None

        self.default = default
        self.param_imp = param_imp
        self.cs = cs

        # Sorting by importance, if possible (choose first executed parameter-importance)
        self.method, self.importance = "", {}
        if pc_sort_by == 'all':
            self.logger.debug("Sorting by average importance")
            self.method = 'average'
            for m, i in self.param_imp.items():
                if i:
                    for p, imp in i.items():
                        if p in self.importance:
                            self.importance[p].append(imp)
                        else:
                            self.importance[p] = [imp]
            self.importance = {
                k: sum(v) / len(v)
                for k, v in self.importance.items()
            }
        elif pc_sort_by in self.param_imp:
            self.method, self.importance = pc_sort_by, self.param_imp[
                pc_sort_by]
        else:
            self.logger.debug("%s not evaluated.. choosing at random from: %s",
                              pc_sort_by, str(list(self.param_imp.keys())))
            for m, i in self.param_imp.items():
                if i:
                    self.method, self.importance = m, i
                    break

        self.hp_names = sorted(
            [hp for hp in self.cs.get_hyperparameter_names()],
            key=lambda x: self.importance.get(x, 0),
            reverse=True)
        self.logger.debug("Sorted hp's by method \'%s\': %s", self.method,
                          str(self.hp_names))

        # To be set
        self.plots = []

        # Define set of configurations (limiting to max and choosing most interesting ones)
        all_configs = original_rh.get_all_configs()
        max_runs_epm = 300000  # Maximum total number of runs considered for epm to limit maximum possible number configs
        max_configs = int(
            max_runs_epm /
            (len(scenario.train_insts) + len(scenario.test_insts)))
        if len(all_configs) > max_configs:
            self.logger.debug(
                "Limiting number of configs to train epm from %d to %d (based on max runs %d) and choosing "
                "the ones with the most runs (for parallel coordinates)",
                len(all_configs), max_configs, max_runs_epm)
            all_configs = sorted(
                all_configs,
                key=lambda c: len(original_rh.get_runs_for_config(c)
                                  ))[:max_configs]
            if not default in all_configs:
                all_configs = [default] + all_configs
            if not incumbent in all_configs:
                all_configs.append(incumbent)

        # Get costs for those configurations
        epm_rh = RunHistory(average_cost)
        epm_rh.update(validated_rh)
        if scenario.feature_dict:  # if instances are available
            epm_rh.update(
                timing(validator.validate_epm)(all_configs,
                                               'train+test',
                                               1,
                                               runhistory=validated_rh))
        self.config_to_cost = {c: epm_rh.get_cost(c) for c in all_configs}

        self.params = self.get_params(params)
        self.n_configs = n_configs

        self.pcp = ParallelCoordinatesPlotter(self.config_to_cost, output_dir,
                                              cs, runtime)
Example #30
0
    def _plot_parallel_coordinates(
        self,
        original_rh: RunHistory,
        validated_rh: RunHistory,
        validator: Validator,
        scenario: Scenario,
        default: Configuration,
        incumbent: Configuration,
        param_imp: Union[None, Dict[str, float]],
        output_dir: str,
        cs: ConfigurationSpace,
        runtime: bool = False,
    ):
        """
        Parameters:
        -----------
        original_rh: RunHistory
            runhistory that should contain only runs that were executed during search
        validated_rh: RunHistory
            runhistory that may contain as many runs as possible, also external runs.
            this runhistory will be used to build the EPM
        validator: Validator
            validator to be used to estimate costs for configurations
        scenario: Scenario
            scenario object to take instances from
        default, incumbent: Configuration
            default and incumbent, they will surely be displayed
        param_imp: Union[None, Dict[str->float]
            if given, maps parameter-names to importance
        output_dir: str
            output directory for plots
        cs: ConfigurationSpace
            parameter configuration space to be visualized
        runtime: boolean
            runtime will be on logscale
        """
        # Sorting parameters by importance, if possible (choose first executed parameter-importance)
        method, importance = "", {}
        if self.pc_sort_by == 'all':
            self.logger.debug("Sorting by average importance")
            method = 'average'
            for m, i in param_imp.items():
                if i:
                    for p, imp in i.items():
                        if p in importance:
                            importance[p].append(imp)
                        else:
                            importance[p] = [imp]
            importance = {k: sum(v) / len(v) for k, v in importance.items()}
        elif self.pc_sort_by in param_imp:
            method, importance = self.pc_sort_by, param_imp[self.pc_sort_by]
        else:
            self.logger.debug("%s not evaluated.. choosing at random from: %s",
                              self.pc_sort_by, str(list(param_imp.keys())))
            for m, i in param_imp.items():
                if i:
                    method, importance = m, i
                    self.logger.debug("Chose %s", method)
                    break

        hp_names = sorted([hp for hp in cs.get_hyperparameter_names()],
                          key=lambda x: importance.get(x, 0),
                          reverse=True)
        self.logger.debug("Sorted hp's by method \'%s\': %s", method,
                          str(hp_names))

        # To be set
        self.plots = []

        # Define set of configurations (limiting to max and choosing most interesting ones)
        all_configs = original_rh.get_all_configs()
        max_runs_epm = self.max_runs_epm  # Maximum total number of runs considered for epm to limit maximum possible number configs
        max_configs = int(
            max_runs_epm /
            (len(scenario.train_insts) + len(scenario.test_insts)))
        if len(all_configs) > max_configs:
            self.logger.debug(
                "Limiting number of configs to train epm from %d to %d (based on max runs %d) and choosing "
                "the ones with the most runs (for parallel coordinates)",
                len(all_configs), max_configs, max_runs_epm)
            all_configs = sorted(
                all_configs,
                key=lambda c: len(original_rh.get_runs_for_config(c)
                                  ))[:max_configs]
            if not default in all_configs:
                all_configs = [default] + all_configs
            if not incumbent in all_configs:
                all_configs.append(incumbent)

        # Get costs for those configurations
        epm_rh = RunHistory(average_cost)
        epm_rh.update(validated_rh)
        if scenario.feature_dict:  # if instances are available
            epm_rh.update(
                timing(validator.validate_epm)(all_configs,
                                               'train+test',
                                               1,
                                               runhistory=validated_rh))
        config_to_cost = {c: epm_rh.get_cost(c) for c in all_configs}

        pcp = ParallelCoordinatesPlotter(config_to_cost, output_dir, cs,
                                         runtime)

        try:
            plots = [
                pcp.plot_n_configs(
                    self.n_configs,
                    self.get_params(self.params, importance, hp_names))
            ]
            self.logger.debug("Paths to plot(s): %s", str(plots))
            return {'figure': plots}
        except ValueError as err:
            self.logger.debug("Error: %s", str(err))
            return {'else': str(err)}