예제 #1
0
def _cost(config, run_history, instance_seed_pairs=None):
    """Return array of all costs for the given config for further calculations.

    Parameters
    ----------
    config : Configuration
        configuration to calculate objective for
    run_history : RunHistory
        RunHistory object from which the objective value is computed.
    instance_seed_pairs : list, optional (default=None)
        list of tuples of instance-seeds pairs. If None, the run_history is
        queried for all runs of the given configuration.

    Returns
    ----------
    list
    """
    try:
        id_ = run_history.config_ids[config]
    except KeyError:  # challenger was not running so far
        return []

    if instance_seed_pairs is None:
        instance_seed_pairs = run_history.get_runs_for_config(config)

    costs = []
    for i, r in instance_seed_pairs:
        k = RunKey(id_, i, r)
        costs.append(run_history.data[k].cost)
    return costs
예제 #2
0
        def get_instance_costs_for_config(runhis: RunHistory,
                                          config: Configuration):
            """
            Returns the average cost per instance (across seeds)
                for a configuration
                Parameters
                ----------
                config : Configuration from ConfigSpace
                    Parameter configuration

                Returns
                -------
                cost_per_inst: dict<instance name<str>, cost<float>>
            """
            config_id = runhis.config_ids.get(config)
            runs_ = runhis._configid_to_inst_seed.get(config_id, [])
            cost_per_inst = {}
            for inst, seed in runs_:
                cost_per_inst[inst] = cost_per_inst.get(inst, [])
                rkey = RunKey(config_id, inst, seed)
                vkey = runhis.data[rkey]
                cost_per_inst[inst].append(vkey.cost)
            cost_per_inst = dict([(inst, np.mean(costs))
                                  for inst, costs in cost_per_inst.items()])
            return cost_per_inst
예제 #3
0
파일: helpers.py 프로젝트: JuAbels/CAVE
def get_cost_dict_for_config(rh: RunHistory,
                             conf: Configuration,
                             par: int = 1,
                             cutoff: typing.Union[float, None] = None):
    """
    Aggregates loss for configuration on evaluated instances over seeds.

    Parameters
    ----------
    rh: RunHistory
        runhistory with data
    conf: Configuration
        configuration to evaluate
    par: int
        par-factor with which to multiply timeouts
    cutoff: float
        cutoff of scenario - used to penalize costs if par != 1

    Returns
    -------
    cost: dict(instance->cost)
        cost per instance (aggregated or as list per seed)
    """
    # Check if config is in runhistory
    conf_id = rh.config_ids[conf]

    # Map instances to seeds in dict
    runs = rh.get_runs_for_config(conf)
    instance_to_seeds = dict()
    for run in runs:
        inst, seed = run
        if inst in instance_to_seeds:
            instance_to_seeds[inst].append(seed)
        else:
            instance_to_seeds[inst] = [seed]

    # Get loss per instance
    instance_costs = {
        i: [rh.data[RunKey(conf_id, i, s)].cost for s in instance_to_seeds[i]]
        for i in instance_to_seeds
    }

    # Aggregate:
    instance_costs = {i: np.mean(instance_costs[i]) for i in instance_costs}

    # TODO: uncomment next line and delete all above after next SMAC dev->master
    # instance_costs = rh.get_instance_costs_for_config(conf)

    if par != 1:
        if cutoff:
            instance_costs = {
                k: v if v < cutoff else v * par
                for k, v in instance_costs.items()
            }
        else:
            raise ValueError(
                "To apply penalization of costs, a cutoff needs to be provided."
            )

    return instance_costs
예제 #4
0
def ensemble_run_history(request):

    run_history = RunHistory()
    run_history._add(
        RunKey(config_id=3,
               instance_id='{"task_id": "breast_cancer"}',
               seed=1,
               budget=3.0),
        RunValue(cost=0.11347517730496459,
                 time=0.21858787536621094,
                 status=None,
                 starttime=time.time(),
                 endtime=time.time(),
                 additional_info={
                     'duration': 0.20323538780212402,
                     'num_run': 3,
                     'configuration_origin': 'Random Search'
                 }),
        status=None,
        origin=None,
    )
    run_history._add(
        RunKey(config_id=6,
               instance_id='{"task_id": "breast_cancer"}',
               seed=1,
               budget=6.0),
        RunValue(cost=2 * 0.11347517730496459,
                 time=2 * 0.21858787536621094,
                 status=None,
                 starttime=time.time(),
                 endtime=time.time(),
                 additional_info={
                     'duration': 0.20323538780212402,
                     'num_run': 6,
                     'configuration_origin': 'Random Search'
                 }),
        status=None,
        origin=None,
    )
    return run_history
예제 #5
0
파일: helpers.py 프로젝트: shunsunsun/CAVE
def get_cost_dict_for_config(rh, conf, aggregate=np.mean):
    """
    Aggregates loss for configuration on evaluated instances over seeds.

    Parameters:
    -----------
    rh: RunHistory
        runhistory with data
    conf: Configuration
        configuration to evaluate
    aggregate: function or None
        used to aggregate loss over different seeds, function must take list as
        argument, if None no aggregation happens (individual values per seed
        returned, but seeds not)

    Returns:
    --------
    loss: dict(instance->loss)
        loss per instance (aggregated or as list per seed)
    """
    # Check if config is in runhistory
    conf_id = rh.config_ids[conf]

    # Map instances to seeds in dict
    runs = rh.get_runs_for_config(conf)
    instance_to_seeds = dict()
    for run in runs:
        inst, seed = run
        if inst in instance_to_seeds:
            instance_to_seeds[inst].append(seed)
        else:
            instance_to_seeds[inst] = [seed]

    # Get loss per instance
    instance_losses = {
        i: [rh.data[RunKey(conf_id, i, s)].cost for s in instance_to_seeds[i]]
        for i in instance_to_seeds
    }

    # Aggregate:
    if aggregate:
        instance_losses = {
            i: aggregate(instance_losses[i])
            for i in instance_losses
        }

    return instance_losses
예제 #6
0
def get_instance_costs_for_config(runhis: RunHistory, config: Configuration):
    """
    return average cost per instance
    :param runhis: SMAC run history
    :param config: parameter configuration
    :return: mapping from instance name to cost
    """
    config_id = runhis.config_ids.get(config)
    runs_ = runhis._configid_to_inst_seed.get(config_id, [])
    cost_per_inst = {}
    for inst, seed in runs_:
        cost_per_inst[inst] = cost_per_inst.get(inst, [])
        rkey = RunKey(config_id, inst, seed)
        vkey = runhis.data[rkey]
        cost_per_inst[inst].append(vkey.cost)
    cost_per_inst = dict([(inst, np.mean(costs))
                          for inst, costs in cost_per_inst.items()])
    return cost_per_inst
예제 #7
0
def make_dict_run_history_data(data):
    run_history_data = dict()
    for row in data:
        run_key = RunKey(
            config_id=row[0][0],
            instance_id=row[0][1],
            seed=row[0][2],
            budget=row[0][3])

        run_value = RunValue(
            cost=row[1][0],
            time=row[1][1],
            status=getattr(StatusType, row[1][2]['__enum__'].split(".")[-1]),
            starttime=row[1][3],
            endtime=row[1][4],
            additional_info=row[1][5],
        )
        run_history_data[run_key] = run_value
    return run_history_data
예제 #8
0
def get_timeout(rh, conf, cutoff):
    """Check for timeouts. If multiple runs for an inst/config-pair are
    available, using the median (not the mean: no fractional timeouts)

    Parameters
    ----------
    rh: RunHistory
        runhistory to take runs from
    conf: Configuration
        config to use
    cutoff: int
        to determine timeouts

    Returns
    -------
    timeouts: Dict(str: bool)
        mapping instances to [True, False], where True indicates a timeout
    """
    # TODO Possibly inconsistent: median over timeouts is timeout, but mean over
    # costs is not. Possible?
    if not cutoff:
        return {}
    # Check if config is in runhistory
    conf_id = rh.config_ids[conf]

    timeouts = {}
    runs = rh.get_runs_for_config(conf, only_max_observed_budget=True)
    for run in runs:
        # Averaging over seeds, run = (inst, seed)
        inst, seed, _git = run
        status = rh.data[RunKey(conf_id, inst, seed)].time < cutoff
        if inst in timeouts:
            timeouts[inst].append(status)
        else:
            timeouts[inst] = [status]
    # Use median
    timeouts = {i: np.floor(np.median(timeouts[i])) for i in timeouts.keys()}
    return timeouts
예제 #9
0
    def _get_mean_var_time(self, validator, traj, use_epm, rh):
        """
        Parameters
        ----------
        validator: Validator
            validator (smac-based)
        traj: List[Configuraton]
            trajectory to set in validator
        use_epm: bool
            validated or not (no need to use epm if validated)
        rh: RunHistory
            ??

        Returns
        -------
        mean, var

        times: List[float]
            times to plot (x-values)
        configs

        """
        # TODO kinda important: docstrings, what is this function doing?
        if validator:
            validator.traj = traj  # set trajectory
        time, configs = [], []

        if use_epm and not self.block_epm:
            for entry in traj:
                time.append(entry["wallclock_time"])
                configs.append(entry["incumbent"])
                # self.logger.debug('Time: %d Runs: %d', time[-1], len(rh.get_runs_for_config(configs[-1])))

            self.logger.debug(
                "Using %d samples (%d distinct) from trajectory.", len(time),
                len(set(configs)))

            # Initialize EPM
            if validator.epm:  # not log as validator epm is trained on cost, not log cost
                epm = validator.epm
            else:
                self.logger.debug(
                    "No EPM passed! Training new one from runhistory.")
                # Train random forest and transform training data (from given rh)
                # Not using validator because we want to plot uncertainties
                rh2epm = RunHistory2EPM4Cost(num_params=len(
                    self.scenario.cs.get_hyperparameters()),
                                             scenario=self.scenario)
                X, y = rh2epm.transform(rh)
                self.logger.debug(
                    "Training model with data of shape X: %s, y: %s",
                    str(X.shape), str(y.shape))

                types, bounds = get_types(self.scenario.cs,
                                          self.scenario.feature_array)
                epm = RandomForestWithInstances(
                    self.scenario.cs,
                    types=types,
                    bounds=bounds,
                    seed=self.rng.randint(MAXINT),
                    instance_features=self.scenario.feature_array,
                    ratio_features=1.0)
                epm.train(X, y)
            config_array = convert_configurations_to_array(configs)
            mean, var = epm.predict_marginalized_over_instances(config_array)
            var = np.zeros(mean.shape)
            # We don't want to show the uncertainty of the model but uncertainty over multiple optimizer runs
            # This variance is computed in an outer loop.
        else:
            mean, var = [], []
            for entry in traj:
                #self.logger.debug(entry)
                time.append(entry["wallclock_time"])
                configs.append(entry["incumbent"])
                self.logger.debug(
                    rh.get_runs_for_config(configs[-1],
                                           only_max_observed_budget=True))
                costs = [
                    rh.data[RunKey(rh.config_ids[configs[-1]], i, s, b)].cost
                    for i, s, b in rh.get_runs_for_config(
                        configs[-1], only_max_observed_budget=True)
                ]
                # self.logger.debug(len(costs), time[-1]
                if not costs:
                    time.pop()
                else:
                    mean.append(np.mean(costs))
                    var.append(0)  # No variance over instances
            mean, var = np.array(mean).reshape(-1, 1), np.array(var).reshape(
                -1, 1)
        return mean, var, time, configs
예제 #10
0
    def _get_runs(
        self,
        configs: Union[str, typing.List[Configuration]],
        insts: Union[str, typing.List[str]],
        repetitions: int = 1,
        runhistory: RunHistory = None,
    ) -> typing.Tuple[typing.List[_Run], RunHistory]:
        """
        Generate list of SMAC-TAE runs to be executed. This means
        combinations of configs with all instances on a certain number of seeds.

        side effect: Adds runs that don't need to be reevaluated to self.rh!

        Parameters
        ----------
        configs: str or list<Configuration>
            string or directly a list of Configuration
            str from [def, inc, def+inc, wallclock_time, cpu_time, all]
                time evaluates at cpu- or wallclock-timesteps of:
                [max_time/2^0, max_time/2^1, max_time/2^3, ..., default]
                with max_time being the highest recorded time
        insts: str or list<str>
            what instances to use for validation, either from
            [train, test, train+test] or directly a list of instances
        repetitions: int
            number of seeds per instance/config-pair to be evaluated
        runhistory: RunHistory
            optional, try to reuse this runhistory and save some runs

        Returns
        -------
        runs: list<_Run>
            list with _Runs
            [_Run(config=CONFIG1,inst=INSTANCE1,seed=SEED1,inst_specs=INST_SPECIFICS1),
             _Run(config=CONFIG2,inst=INSTANCE2,seed=SEED2,inst_specs=INST_SPECIFICS2),
             ...]
        """
        # Get relevant configurations and instances
        if isinstance(configs, str):
            configs = self._get_configs(configs)
        if isinstance(insts, str):
            instances = self._get_instances(
                insts)  # type: typing.Sequence[typing.Union[str, None]]
        elif insts is not None:
            instances = insts
        else:
            instances = [None]
        # If no instances are given, fix the instances to one "None" instance
        if not instances:
            instances = [None]

        # If algorithm is deterministic, fix repetitions to 1
        if self.scen.deterministic and repetitions != 1:  # type: ignore[attr-defined] # noqa F821
            self.logger.warning(
                "Specified %d repetitions, but fixing to 1, "
                "because algorithm is deterministic.", repetitions)
            repetitions = 1

        # Extract relevant information from given runhistory
        inst_seed_config = self._process_runhistory(configs, instances,
                                                    runhistory)

        # Now create the actual run-list
        runs = []
        # Counter for runs without the need of recalculation
        runs_from_rh = 0
        # If we reuse runs, we want to return them as well
        new_rh = RunHistory()

        for i in sorted(instances):
            for rep in range(repetitions):
                # First, find a seed and add all the data we can take from the
                # given runhistory to "our" validation runhistory.
                configs_evaluated = []  # type: Configuration
                if runhistory and i in inst_seed_config:
                    # Choose seed based on most often evaluated inst-seed-pair
                    seed, configs_evaluated = inst_seed_config[i].pop(0)
                    # Delete inst if all seeds are used
                    if not inst_seed_config[i]:
                        inst_seed_config.pop(i)
                    # Add runs to runhistory
                    for c in configs_evaluated[:]:
                        runkey = RunKey(runhistory.config_ids[c], i, seed)
                        cost, time, status, start, end, additional_info = runhistory.data[
                            runkey]
                        if status in [
                                StatusType.CRASHED, StatusType.ABORT,
                                StatusType.CAPPED
                        ]:
                            # Not properly executed target algorithm runs should be repeated
                            configs_evaluated.remove(c)
                            continue
                        new_rh.add(c,
                                   cost,
                                   time,
                                   status,
                                   instance_id=i,
                                   seed=seed,
                                   starttime=start,
                                   endtime=end,
                                   additional_info=additional_info)
                        runs_from_rh += 1
                else:
                    # If no runhistory or no entries for instance, get new seed
                    seed = self.rng.randint(MAXINT)

                # We now have a seed and add all configs that are not already
                # evaluated on that seed to the runs-list. This way, we
                # guarantee the same inst-seed-pairs for all configs.
                for config in [
                        c for c in configs if c not in configs_evaluated
                ]:
                    # Only use specifics if specific exists, else use string "0"
                    specs = self.scen.instance_specific[
                        i] if i and i in self.scen.instance_specific else "0"
                    runs.append(
                        _Run(config=config,
                             inst=i,
                             seed=seed,
                             inst_specs=specs))

        self.logger.info(
            "Collected %d runs from %d configurations on %d "
            "instances with %d repetitions. Reusing %d runs from "
            "given runhistory.", len(runs), len(configs), len(instances),
            repetitions, runs_from_rh)

        return runs, new_rh
예제 #11
0
    def test_load(self):
        configuration_space = test_helpers.get_branin_config_space()

        other_runhistory = '{"data": [[[2, "branini", 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
                  '[[1, "branin", 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
                  '[[3, "branin-hoo", 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
                  '[[2, null, 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
                  '[[1, "branini", 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
                  '[[4, null, 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]]], ' \
                  '"configs": {' \
                  '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \
                  '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \
                  '"1": {"x": 1.2553300705386103, "y": 10.804867401632372}, ' \
                  '"2": {"x": -4.998284377739827, "y": 4.534988589477597}}}'

        other_runhistory_filename = os.path.join(self.tmp_dir,
                                                 'runhistory.json')
        with open(other_runhistory_filename, 'w') as fh:
            fh.write(other_runhistory)

        # load from an empty runhistory
        runhistory = RunHistory(aggregate_func=average_cost)
        runhistory.load_json(other_runhistory_filename, configuration_space)
        self.assertEqual(sorted(list(runhistory.ids_config.keys())),
                         [1, 2, 3, 4])
        self.assertEqual(len(runhistory.data), 6)

        # load from non-empty runhistory, in case of a duplicate the existing
        # result will be kept and the new one silently discarded
        runhistory = RunHistory(aggregate_func=average_cost)
        configuration_space.seed(1)
        config = configuration_space.sample_configuration()
        runhistory.add(config,
                       1,
                       1,
                       StatusType.SUCCESS,
                       seed=1,
                       instance_id='branin')
        id_before = id(runhistory.data[RunKey(1, 'branin', 1)])
        runhistory.update_from_json(other_runhistory_filename,
                                    configuration_space)
        id_after = id(runhistory.data[RunKey(1, 'branin', 1)])
        self.assertEqual(len(runhistory.data), 6)
        self.assertEqual(id_before, id_after)

        # load from non-empty runhistory, in case of a duplicate the existing
        # result will be kept and the new one silently discarded
        runhistory = RunHistory(aggregate_func=average_cost)
        configuration_space.seed(1)
        config = configuration_space.sample_configuration()
        config = configuration_space.sample_configuration()
        # This is the former config_3
        config = configuration_space.sample_configuration()
        runhistory.add(config,
                       1,
                       1,
                       StatusType.SUCCESS,
                       seed=1,
                       instance_id='branin')
        id_before = id(runhistory.data[RunKey(1, 'branin', 1)])
        runhistory.update_from_json(other_runhistory_filename,
                                    configuration_space)
        id_after = id(runhistory.data[RunKey(1, 'branin', 1)])
        self.assertEqual(len(runhistory.data), 7)
        self.assertEqual(id_before, id_after)
        self.assertEqual(sorted(list(runhistory.ids_config.keys())),
                         [1, 2, 3, 4])
        self.assertEqual(
            [runhistory.external[run_key] for run_key in runhistory.data],
            [DataOrigin.INTERNAL] + [DataOrigin.EXTERNAL_SAME_INSTANCES] * 6)
예제 #12
0
파일: validate.py 프로젝트: midasc/SMAC3
    def get_runs(self, configs, insts, repetitions=1, runhistory=None):
        """
        Generate list of SMAC-TAE runs to be executed. This means
        combinations of configs with all instances on a certain number of seeds.

        Parameters
        ----------
        configs: list<Configuration>
            configurations to be evaluated
        insts: list<strings>
            instances to be validated
        repetitions: int
            number of seeds per instance/config to be evaluated
        runhistory: RunHistory or None
            if given, try to reuse these results and save some runs

        Returns
        -------
        runs: list<dict<string,string,string,string>>
            list with dicts
            [{"config":CONFIG1,"inst":INSTANCE1,"seed":SEED1,"inst_specs":INST_SPECIFICS1},
             {"config":CONFIG2,"inst":INSTANCE2,"seed":SEED2,"inst_specs":INST_SPECIFICS2}]
        """
        # If no instances are given, fix the instances to one "None" instance
        if len(insts) == 0:
            insts = [None]
        # If algorithm is deterministic, fix repetitions to 1
        if self.scen.deterministic:
            self.logger.debug("Fixing repetitions to one, because algorithm is"
                              " deterministic.")
            repetitions = 1

        # Extract relevant information from given runhistory
        inst_seed_config = self._process_runhistory(configs, insts, runhistory)

        # Now create the actual run-list
        runs = []
        # Counter for runs without the need of recalculation
        runs_from_rh = 0

        for i in sorted(insts):
            for rep in range(repetitions):
                configs_evaluated = []
                if runhistory and i in inst_seed_config:
                    # Choose seed based on most often evaluated inst-seed-pair
                    seed, configs_evaluated = inst_seed_config[i].pop(0)
                    # Delete i from dict if list is empty
                    if len(inst_seed_config[i]) == 0:
                        inst_seed_config.pop(i)
                    # Add runs to runhistory
                    for c in configs_evaluated:
                        runkey = RunKey(runhistory.config_ids[c], i, seed)
                        cost, time, status, additional_info = runhistory.data[
                            runkey]
                        self.rh.add(c,
                                    cost,
                                    time,
                                    status,
                                    instance_id=i,
                                    seed=seed,
                                    additional_info=additional_info)
                        runs_from_rh += 1
                else:
                    # If no runhistory or no entries for instance, get new seed
                    seed = self.rng.randint(MAXINT)
                    if self.scen.deterministic:
                        seed = 0
                # configs in inner loop -> same inst-seed-pairs for all configs
                for config in [
                        c for c in configs if not c in configs_evaluated
                ]:
                    specs = self.scen.instance_specific[
                        i] if i and i in self.scen.instance_specific else "0"
                    runs.append({
                        'config': config,
                        'inst': i,
                        'seed': seed,
                        'inst_specs': specs
                    })

        self.logger.info(
            "Collected %d runs from %d configurations on %d instances "
            "with %d repetitions.", len(runs), len(configs), len(insts),
            repetitions)
        self.logger.info("Using %d runs from given runhistory.", runs_from_rh)

        return runs
예제 #13
0
    def _race_challenger(self, challenger: Configuration,
                         incumbent: Configuration, run_history: RunHistory,
                         aggregate_func: typing.Callable):
        '''
            aggressively race challenger against incumbent

            Parameters
            ----------
            challenger : Configuration
                configuration which challenges incumbent
            incumbent : Configuration
                best configuration so far
            run_history : RunHistory
                stores all runs we ran so far
            aggregate_func: typing.Callable
                aggregate performance across instances

            Returns
            -------
            new_incumbent: Configuration
                either challenger or incumbent
        '''
        # at least one run of challenger
        # to increase chall_indx counter
        first_run = False
        inc_perf = run_history.get_cost(incumbent)

        learning_curve = []

        self._num_run += 1
        self._chall_indx += 1

        pc = None
        for epoch in range(self.max_epochs):
            status, cost, time, add_info = self.tae_runner.start(
                config=challenger,
                instance=None,
                seed=0,
                cutoff=2**32 - 1,
                instance_specific=None,
                pc=pc)
            try:
                pc = add_info["model"]
            except KeyError:  # model building failed, e.g. because of nan
                break

            learning_curve.append(cost)

            if len(self.learning_curves) > 10 and epoch > self.max_epochs / 4:
                seen_curves = np.array(self.learning_curves)[:, epoch]
                if cost > np.median(seen_curves):
                    self.logger.info("Abort run (%f vs %f)" %
                                     (cost, np.median(seen_curves)))
                    break

        # delete model in runhistory to be more memory efficient
        chall_id = run_history.config_ids[challenger]
        runkey = RunKey(chall_id, None, 0)
        runvalue = run_history.data[runkey]
        try:
            del runvalue.additional_info["model"]
        except KeyError:
            pass

        if epoch == self.max_epochs - 1:
            self.learning_curves.append(learning_curve)

        chal_perf = cost

        if cost < inc_perf:
            self.logger.info(
                "Challenger (%.4f) is better than incumbent (%.4f)" %
                (chal_perf, inc_perf))
            # Show changes in the configuration
            params = sorted([(param, incumbent[param], challenger[param])
                             for param in challenger.keys()])
            self.logger.info("Changes in incumbent:")
            for param in params:
                if param[1] != param[2]:
                    self.logger.info("  %s : %r -> %r" % (param))
                else:
                    self.logger.debug("  %s remains unchanged: %r" %
                                      (param[0], param[1]))
            incumbent = challenger
            self.stats.inc_changed += 1
            self.traj_logger.add_entry(train_perf=chal_perf,
                                       incumbent_id=self.stats.inc_changed,
                                       incumbent=challenger)
        else:
            self.logger.debug(
                "Incumbent (%.4f) is better than challenger (%.4f)" %
                (inc_perf, chal_perf))

        return incumbent
예제 #14
0
def fmin_smac(func: typing.Callable,
              x0: typing.List[float],
              bounds: typing.List[typing.Iterable[float]],
              maxfun: int = -1,
              rng: typing.Union[np.random.RandomState, int] = None,
              scenario_args: typing.Mapping[str, typing.Any] = None,
              **kwargs):
    """
    Minimize a function func using the SMAC4HPO facade
    (i.e., a modified version of SMAC).
    This function is a convenience wrapper for the SMAC4HPO class.

    Parameters
    ----------
    func : typing.Callable
        Function to minimize.
    x0 : typing.List[float]
        Initial guess/default configuration.
    bounds : typing.List[typing.List[float]]
        ``(min, max)`` pairs for each element in ``x``, defining the bound on
        that parameters.
    maxfun : int, optional
        Maximum number of function evaluations.
    rng : np.random.RandomState, optional
            Random number generator used by SMAC.
    scenario_args: typing.Mapping[str,typing.Any]
        Arguments passed to the scenario
        See smac.scenario.scenario.Scenario
    **kwargs:
        Arguments passed to the optimizer class
        See ~smac.facade.smac_facade.SMAC

    Returns
    -------
    x : list
        Estimated position of the minimum.
    f : float
        Value of `func` at the minimum.
    s : :class:`smac.facade.smac_hpo_facade.SMAC4HPO`
        SMAC objects which enables the user to get
        e.g., the trajectory and runhistory.

    """
    # create configuration space
    cs = ConfigurationSpace()

    # Adjust zero padding
    tmplt = 'x{0:0' + str(len(str(len(bounds)))) + 'd}'

    for idx, (lower_bound, upper_bound) in enumerate(bounds):
        parameter = UniformFloatHyperparameter(name=tmplt.format(idx + 1),
                                               lower=lower_bound,
                                               upper=upper_bound,
                                               default_value=x0[idx])
        cs.add_hyperparameter(parameter)

    # create scenario
    scenario_dict = {
        "run_obj": "quality",
        "cs": cs,
        "deterministic": "true",
        "initial_incumbent": "DEFAULT",
    }

    if scenario_args is not None:
        scenario_dict.update(scenario_args)

    if maxfun > 0:
        scenario_dict["runcount_limit"] = maxfun
    scenario = Scenario(scenario_dict)

    smac = SMAC4HPO(scenario=scenario,
                    tae_runner=ExecuteTAFuncArray,
                    tae_runner_kwargs={'ta': func},
                    rng=rng,
                    **kwargs)

    smac.logger = logging.getLogger(smac.__module__ + "." +
                                    smac.__class__.__name__)
    incumbent = smac.optimize()
    config_id = smac.solver.runhistory.config_ids[incumbent]
    run_key = RunKey(config_id, None, 0)
    incumbent_performance = smac.solver.runhistory.data[run_key]
    incumbent = np.array(
        [incumbent[tmplt.format(idx + 1)] for idx in range(len(bounds))],
        dtype=np.float)
    return incumbent, incumbent_performance.cost, smac
예제 #15
0
def fmin_smac(func: callable,
              x0: list,
              bounds: list,
              maxfun: int = -1,
              maxtime: int = -1,
              rng: np.random.RandomState = None):
    """ Minimize a function func using the SMAC algorithm.
    This function is a convenience wrapper for the SMAC class.

    Parameters
    ----------
    func : callable f(x)
        Function to minimize.
    x0 : list
        Initial guess/default configuration.
    bounds : list
        ``(min, max)`` pairs for each element in ``x``, defining the bound on
        that parameters.
    maxtime : int, optional
        Maximum runtime in seconds.
    maxfun : int, optional
        Maximum number of function evaluations.
    rng : np.random.RandomState, optional
            Random number generator used by SMAC.

    Returns
    -------
    x : list
        Estimated position of the minimum.
    f : float
        Value of `func` at the minimum.
    s : :class:`smac.facade.smac_facade.SMAC`
        SMAC objects which enables the user to get
        e.g., the trajectory and runhistory.
    """
    # create configuration space
    cs = ConfigurationSpace()
    for idx, (lower_bound, upper_bound) in enumerate(bounds):
        parameter = UniformFloatHyperparameter(name="x%d" % (idx + 1),
                                               lower=lower_bound,
                                               upper=upper_bound,
                                               default_value=x0[idx])
        cs.add_hyperparameter(parameter)

    # Create target algorithm runner
    ta = ExecuteTAFuncArray(ta=func)

    # create scenario
    scenario_dict = {
        "run_obj": "quality",
        "cs": cs,
        "deterministic": "true",
        "initial_incumbent": "DEFAULT"
    }
    if maxfun > 0:
        scenario_dict["runcount_limit"] = maxfun
    if maxtime > 0:
        scenario_dict["wallclock_limit"] = maxtime
    scenario = Scenario(scenario_dict)

    smac = SMAC(scenario=scenario, tae_runner=ta, rng=rng)
    smac.logger = logging.getLogger(smac.__module__ + "." +
                                    smac.__class__.__name__)
    incumbent = smac.optimize()

    config_id = smac.solver.runhistory.config_ids[incumbent]
    run_key = RunKey(config_id, None, 0)
    incumbent_performance = smac.solver.runhistory.data[run_key]
    incumbent = np.array(
        [incumbent['x%d' % (idx + 1)] for idx in range(len(bounds))],
        dtype=np.float)
    return incumbent, incumbent_performance.cost, \
           smac
예제 #16
0
    def __init__(self,
                 scenario: Scenario,
                 smac: Union[SMAC, None] = None,
                 mode: str = 'all',
                 X: Union[None, List[list], np.ndarray] = None,
                 y: Union[None, List[list], np.ndarray] = None,
                 numParams: int = -1,
                 impute: bool = False,
                 seed: int = 12345,
                 run: bool = False,
                 max_sample_size: int = -1,
                 fanova_cut_at_default: bool = False,
                 fANOVA_pairwise: bool = True,
                 forwardsel_feat_imp: bool = False,
                 incn_quant_var: bool = True,
                 marginalize_away_instances: bool = False,
                 save_folder: str = 'PIMP'):
        """
        Interface to be used with SMAC or with X and y matrices.
        :param scenario: The scenario object, that knows the configuration space.
        :param smac: The smac object that keeps all the run-data
        :param mode: The mode with which to run PIMP [ablation, fanova, all, forward-selection]
        :param X: Numpy Array that contains parameter arrays
        :param y: Numpy array that contains the corresponding performance values
        :param numParams: The number of parameters to evaluate
        :param impute: Flag to decide if censored data gets imputed or not
        :param seed: The random seed
        :param run: Flag to immediately compute the importance values after this setup or not.
        """
        self.scenario = scenario
        self.imp = None
        self.mode = mode
        self.save_folder = save_folder
        if not os.path.exists(self.save_folder): os.mkdir(self.save_folder)
        if smac is not None:
            self.imp = Importance(scenario=scenario,
                                  runhistory=smac.runhistory,
                                  incumbent=smac.solver.incumbent,
                                  seed=seed,
                                  parameters_to_evaluate=numParams,
                                  save_folder='PIMP',
                                  impute_censored=impute,
                                  max_sample_size=max_sample_size,
                                  fANOVA_cut_at_default=fanova_cut_at_default,
                                  fANOVA_pairwise=fANOVA_pairwise,
                                  forwardsel_feat_imp=forwardsel_feat_imp,
                                  incn_quant_var=incn_quant_var,
                                  preprocess=marginalize_away_instances)
        elif X is not None and y is not None:
            X = np.array(X)
            y = np.array(y)
            runHist = RunHistory(average_cost)
            if X.shape[0] != y.shape[0]:
                raise Exception('Number of samples in X and y dont match!')
            n_params = len(scenario.cs.get_hyperparameters())
            feats = None
            if X.shape[1] > n_params:
                feats = X[:, n_params:]
                assert feats.shape[1] == scenario.feature_array.shape[1]
                X = X[:, :n_params]

            for p in range(X.shape[1]):  # Normalize the data to fit into [0, 1]
                _min, _max = np.min(X[:, p]), np.max(X[:, p])
                if _min < 0. or 1 < _max:  # if it is not already normalized
                    for id, v in enumerate(X[:, p]):
                        X[id, p] = (v - _min) / (_max - _min)

            # Add everything to a runhistory such that PIMP can work with it
            for x, feat, y_val in zip(X, feats if feats is not None else X, y):
                id = None
                for inst in scenario.feature_dict:  # determine on which instance a configuration was run
                    if np.all(scenario.feature_dict[inst] == feat):
                        id = inst
                        break
                runHist.add(Configuration(scenario.cs, vector=x), y_val, 0, StatusType.SUCCESS, id)
            self.X = X
            self.y = y

            best_ = None  # Determine incumbent according to the best mean cost in the runhistory
            for config in runHist.config_ids:
                inst_seed_pairs = runHist.get_runs_for_config(config)
                all_ = []
                for inst, seed in inst_seed_pairs:
                    rk = RunKey(runHist.config_ids[config], inst, seed)
                    all_.append(runHist.data[rk].cost)
                mean = np.mean(all_)
                if best_ is None or best_[0] > mean:
                    best_ = (mean, config)
            incumbent = best_[1]
            self.imp = Importance(scenario=scenario,
                                  runhistory=runHist,
                                  seed=seed,
                                  parameters_to_evaluate=numParams,
                                  save_folder=self.save_folder,
                                  impute_censored=impute,
                                  incumbent=incumbent,
                                  fANOVA_cut_at_default=fanova_cut_at_default,
                                  fANOVA_pairwise=fANOVA_pairwise,
                                  forwardsel_feat_imp=forwardsel_feat_imp,
                                  incn_quant_var=incn_quant_var,
                                  preprocess=marginalize_away_instances
                                  )
        else:
            raise Exception('Neither X and y matrices nor a SMAC object were specified to compute the importance '
                            'values from!')

        if run:
            self.compute_importances()
예제 #17
0
    def test_load(self):
        configuration_space = test_helpers.get_branin_config_space()

        other_runhistory = '{"data": [[[2, "branini", 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
                  '[[1, "branin", 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
                  '[[3, "branin-hoo", 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
                  '[[2, null, 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
                  '[[1, "branini", 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
                  '[[4, null, 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]]], ' \
                  '"configs": {' \
                  '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \
                  '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \
                  '"1": {"x": 1.2553300705386103, "y": 10.804867401632372}, ' \
                  '"2": {"x": -4.998284377739827, "y": 4.534988589477597}}}'

        other_runhistory_filename = os.path.join(self.tmp_dir,
                                                 '.runhistory_20.json')
        with open(other_runhistory_filename, 'w') as fh:
            fh.write(other_runhistory)

        # load from an empty runhistory
        runhistory = RunHistory(aggregate_func=average_cost)
        runhistory.load_json(other_runhistory_filename, configuration_space)
        self.assertEqual(sorted(list(runhistory.ids_config.keys())),
                         [1, 2, 3, 4])
        self.assertEqual(len(runhistory.data), 6)

        # load from non-empty runhistory, but existing run will be overridden
        #  because it alread existed
        runhistory = RunHistory(aggregate_func=average_cost)
        configuration_space.seed(1)
        config = configuration_space.sample_configuration()
        runhistory.add(config, 1, 1, StatusType.SUCCESS, seed=1,
                        instance_id='branin')
        id_before = id(runhistory.data[RunKey(1, 'branin', 1)])
        runhistory.update_from_json(other_runhistory_filename,
                                    configuration_space)
        id_after = id(runhistory.data[RunKey(1, 'branin', 1)])
        self.assertEqual(len(runhistory.data), 6)
        self.assertNotEqual(id_before, id_after)

        # load from non-empty runhistory, but existing run will not be
        # overridden, but config_id will be re-used
        runhistory = RunHistory(aggregate_func=average_cost)
        configuration_space.seed(1)
        config = configuration_space.sample_configuration()
        config = configuration_space.sample_configuration()
        # This is the former config_3
        config = configuration_space.sample_configuration()
        runhistory.add(config, 1, 1, StatusType.SUCCESS, seed=1,
                       instance_id='branin')
        id_before = id(runhistory.data[RunKey(1, 'branin', 1)])
        runhistory.update_from_json(other_runhistory_filename,
                                    configuration_space)
        id_after = id(runhistory.data[RunKey(1, 'branin', 1)])
        self.assertEqual(len(runhistory.data), 7)
        self.assertEqual(id_before, id_after)
        print(runhistory.config_ids)
        self.assertEqual(sorted(list(runhistory.ids_config.keys())),
                         [1, 2, 3, 4])
        print(list(runhistory.data.keys()))