Ejemplo n.º 1
0
    def _get_mean_var_time(self, validator, traj, use_epm, rh):
        """
        Parameters
        ----------
        validator: Validator
            validator (smac-based)
        traj: List[Configuraton]
            trajectory to set in validator
        use_epm: bool
            validated or not (no need to use epm if validated)
        rh: RunHistory
            ??

        Returns
        -------
        mean, var

        times: List[float]
            times to plot (x-values)
        configs

        """
        # TODO kinda important: docstrings, what is this function doing?
        if validator:
            validator.traj = traj  # set trajectory
        time, configs = [], []

        if use_epm and not self.block_epm:
            for entry in traj:
                time.append(entry["wallclock_time"])
                configs.append(entry["incumbent"])
                # self.logger.debug('Time: %d Runs: %d', time[-1], len(rh.get_runs_for_config(configs[-1])))

            self.logger.debug(
                "Using %d samples (%d distinct) from trajectory.", len(time),
                len(set(configs)))

            # Initialize EPM
            if validator.epm:  # not log as validator epm is trained on cost, not log cost
                epm = validator.epm
            else:
                self.logger.debug(
                    "No EPM passed! Training new one from runhistory.")
                # Train random forest and transform training data (from given rh)
                # Not using validator because we want to plot uncertainties
                rh2epm = RunHistory2EPM4Cost(num_params=len(
                    self.scenario.cs.get_hyperparameters()),
                                             scenario=self.scenario)
                X, y = rh2epm.transform(rh)
                self.logger.debug(
                    "Training model with data of shape X: %s, y: %s",
                    str(X.shape), str(y.shape))

                types, bounds = get_types(self.scenario.cs,
                                          self.scenario.feature_array)
                epm = RandomForestWithInstances(
                    self.scenario.cs,
                    types=types,
                    bounds=bounds,
                    seed=self.rng.randint(MAXINT),
                    instance_features=self.scenario.feature_array,
                    ratio_features=1.0)
                epm.train(X, y)
            config_array = convert_configurations_to_array(configs)
            mean, var = epm.predict_marginalized_over_instances(config_array)
            var = np.zeros(mean.shape)
            # We don't want to show the uncertainty of the model but uncertainty over multiple optimizer runs
            # This variance is computed in an outer loop.
        else:
            mean, var = [], []
            for entry in traj:
                #self.logger.debug(entry)
                time.append(entry["wallclock_time"])
                configs.append(entry["incumbent"])
                costs = _cost(configs[-1], rh,
                              rh.get_runs_for_config(configs[-1]))
                # self.logger.debug(len(costs), time[-1]
                if not costs:
                    time.pop()
                else:
                    mean.append(np.mean(costs))
                    var.append(0)  # No variance over instances
            mean, var = np.array(mean).reshape(-1, 1), np.array(var).reshape(
                -1, 1)
        return mean, var, time, configs
def_dict = def_.get_dictionary()

# Switch it around such that statistics about the default are gathered first
if configs[0] != def_:
    tmp = configs[0]
    configs[0] = configs[1]
    configs[1] = tmp
    del tmp
logging.info('Found %d configs' % len(configs))
logging.info('Cost per config:')

# For each config
for config in configs:
    # gather statistics such as
    config_dict = config.get_dictionary()
    costs = np.array(_cost(config,
                           runhist))  # the cost for running on each instance
    runtime = np.mean(np.array(_runtime(config, runhist)))  # the mean runtime
    cost = np.mean(costs)  # the mean cost
    timeouts = np.sum(
        costs > scenario.cutoff)  # and count the number of timeouts
    default = config == def_

    # This is just cosmetics for the output
    config_str = config.__repr__().split('\n')  # get the string representation
    num_changed = 0
    for idx, line in enumerate(config_str):
        try:
            name, value = list(map(lambda x: x.strip(), line.split(', ')))
            if name in def_dict:
                changed = config_dict[name] != def_dict[name]
            else: