Python MetaBase.get_configuration_from_algorithm_index Beispiele

Programmiersprache: Python

Namespace / Paketname: autosklearn.metalearning.metalearning.meta_base

Klasse / Typ: MetaBase

Methode / Funktion: get_configuration_from_algorithm_index

Beispiele auf hotexamples.com: 4

Python MetaBase.get_configuration_from_algorithm_index - 4 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die autosklearn.metalearning.metalearning.meta_base.MetaBase.get_configuration_from_algorithm_index, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

MetaBase(9)

get_metafeatures(7)

get_all_runs(5)

add_dataset(4)

get_runs(4)

remove_dataset(3)

get_all_metafeatures(2)

get_configuration_from_algorithm_index(2)

get_algorithm_index_from_configuration(1)

get_all_dataset_names(1)

get_all_train_metafeatures_as_pandas(1)

Beispiel #1

Datei anzeigen

Datei: smbo.py Projekt: wyslatitude/auto-sklearn

    def run_smbo(self, max_iters=1000):
        global evaluator

        # == first things first: load the datamanager
        self.reset_data_manager()

        # == Initialize SMBO stuff
        # first create a scenario
        seed = self.seed  # TODO
        num_params = len(self.config_space.get_hyperparameters())
        # allocate a run history
        run_history = RunHistory()
        meta_runhistory = RunHistory()
        meta_runs_dataset_indices = {}
        num_run = self.start_num_run
        instance_id = self.dataset_name + SENTINEL

        # == Train on subset
        #    before doing anything, let us run the default_cfg
        #    on a subset of the available data to ensure that
        #    we at least have some models
        #    we will try three different ratios of decreasing magnitude
        #    in the hope that at least on the last one we will be able
        #    to get a model
        n_data = self.datamanager.data['X_train'].shape[0]
        subset_ratio = 10000. / n_data
        if subset_ratio >= 0.5:
            subset_ratio = 0.33
            subset_ratios = [subset_ratio, subset_ratio * 0.10]
        else:
            subset_ratios = [subset_ratio, 500. / n_data]
        self.logger.info("Training default configurations on a subset of "
                         "%d/%d data points." %
                         (int(n_data * subset_ratio), n_data))

        # the time limit for these function evaluations is rigorously
        # set to only 1/2 of a full function evaluation
        subset_time_limit = max(5, int(self.func_eval_time_limit / 2))
        # the configs we want to run on the data subset are:
        # 1) the default configs
        # 2) a set of configs we selected for training on a subset
        subset_configs = [self.config_space.get_default_configuration()] \
                          + self.collect_additional_subset_defaults()
        subset_config_succesful = [False] * len(subset_configs)
        for subset_config_id, next_config in enumerate(subset_configs):
            for i, ratio in enumerate(subset_ratios):
                self.reset_data_manager()
                n_data_subsample = int(n_data * ratio)

                # run the config, but throw away the result afterwards
                # since this cfg was evaluated only on a subset
                # and we don't want  to confuse SMAC
                self.logger.info(
                    "Starting to evaluate %d on SUBSET "
                    "with size %d and time limit %ds.", num_run,
                    n_data_subsample, subset_time_limit)
                self.logger.info(next_config)
                _info = eval_with_limits(self.datamanager, self.tmp_dir,
                                         next_config, seed, num_run,
                                         self.resampling_strategy,
                                         self.resampling_strategy_args,
                                         self.memory_limit, subset_time_limit,
                                         n_data_subsample)
                (duration, result, _, additional_run_info, status) = _info
                self.logger.info(
                    "Finished evaluating %d. configuration on SUBSET. "
                    "Duration %f; loss %f; status %s; additional run "
                    "info: %s ", num_run, duration, result, str(status),
                    additional_run_info)

                num_run += 1
                if i < len(subset_ratios) - 1:
                    if status != StatusType.SUCCESS:
                        # Do not increase num_run here, because we will try
                        # the same configuration with less data
                        self.logger.info(
                            "A CONFIG did not finish "
                            " for subset ratio %f -> going smaller", ratio)
                        continue
                    else:
                        self.logger.info(
                            "Finished SUBSET training sucessfully "
                            "with ratio %f", ratio)
                        subset_config_succesful[subset_config_id] = True
                        break
                else:
                    if status != StatusType.SUCCESS:
                        self.logger.info(
                            "A CONFIG did not finish "
                            " for subset ratio %f.", ratio)
                        continue
                    else:
                        self.logger.info(
                            "Finished SUBSET training sucessfully "
                            "with ratio %f", ratio)
                        subset_config_succesful[subset_config_id] = True
                        break

        # Use the first non-failing configuration from the subsets as the new
        #  default configuration -> this guards us against the random forest
        # failing on large, sparse datasets
        default_cfg = None
        for subset_config_id, next_config in enumerate(subset_configs):
            if subset_config_succesful[subset_config_id]:
                default_cfg = next_config
                break
        if default_cfg is None:
            default_cfg = self.config_space.get_default_configuration()

        # == METALEARNING suggestions
        # we start by evaluating the defaults on the full dataset again
        # and add the suggestions from metalearning behind it

        if self.metadata_directory is None:
            metalearning_directory = os.path.dirname(
                autosklearn.metalearning.__file__)
            # There is no multilabel data in OpenML
            if self.task == MULTILABEL_CLASSIFICATION:
                meta_task = BINARY_CLASSIFICATION
            else:
                meta_task = self.task
            metadata_directory = os.path.join(
                metalearning_directory, 'files', '%s_%s_%s' %
                (METRIC_TO_STRING[self.metric],
                 TASK_TYPES_TO_STRING[meta_task],
                 'sparse' if self.datamanager.info['is_sparse'] else 'dense'))
            self.metadata_directory = metadata_directory

        self.logger.info('Metadata directory: %s', self.metadata_directory)
        meta_base = MetaBase(self.config_space, self.metadata_directory)

        metafeature_calculation_time_limit = int(self.total_walltime_limit / 4)
        metafeature_calculation_start_time = time.time()
        meta_features = self._calculate_metafeatures_with_limits(
            metafeature_calculation_time_limit)
        metafeature_calculation_end_time = time.time()
        metafeature_calculation_time_limit = \
            metafeature_calculation_time_limit - (
            metafeature_calculation_end_time -
            metafeature_calculation_start_time)

        if metafeature_calculation_time_limit < 1:
            self.logger.warning(
                'Time limit for metafeature calculation less '
                'than 1 seconds (%f). Skipping calculation '
                'of metafeatures for encoded dataset.',
                metafeature_calculation_time_limit)
            meta_features_encoded = None
        else:
            self.datamanager.perform1HotEncoding()
            meta_features_encoded = \
                self._calculate_metafeatures_encoded_with_limits(
                    metafeature_calculation_time_limit)

        # In case there is a problem calculating the encoded meta-features
        if meta_features is None:
            if meta_features_encoded is not None:
                meta_features = meta_features_encoded
        else:
            if meta_features_encoded is not None:
                meta_features.metafeature_values.update(
                    meta_features_encoded.metafeature_values)

        if meta_features is not None:
            meta_base.add_dataset(instance_id, meta_features)
            # Do mean imputation of the meta-features - should be done specific
            # for each prediction model!
            all_metafeatures = meta_base.get_metafeatures(
                features=list(meta_features.keys()))
            all_metafeatures.fillna(all_metafeatures.mean(), inplace=True)

            metalearning_configurations = self.collect_metalearning_suggestions(
                meta_base)
            if metalearning_configurations is None:
                metalearning_configurations = []
            self.reset_data_manager()

            self.logger.info('%s', meta_features)

            # Convert meta-features into a dictionary because the scenario
            # expects a dictionary
            meta_features_dict = {}
            for dataset, series in all_metafeatures.iterrows():
                meta_features_dict[dataset] = series.values
            meta_features_list = []
            for meta_feature_name in all_metafeatures.columns:
                meta_features_list.append(
                    meta_features[meta_feature_name].value)
            meta_features_list = np.array(meta_features_list).reshape((1, -1))
            self.logger.info(list(meta_features_dict.keys()))

            meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric])
            meta_runs_index = 0
            try:
                meta_durations = meta_base.get_all_runs('runtime')
                read_runtime_data = True
            except KeyError:
                read_runtime_data = False
                self.logger.critical('Cannot read runtime data.')
                if self.acquisition_function == 'EIPS':
                    self.logger.critical(
                        'Reverting to acquisition function EI!')
                    self.acquisition_function = 'EI'

            for meta_dataset in meta_runs.index:
                meta_dataset_start_index = meta_runs_index
                for meta_configuration in meta_runs.columns:
                    if np.isfinite(meta_runs.loc[meta_dataset,
                                                 meta_configuration]):
                        try:
                            config = meta_base.get_configuration_from_algorithm_index(
                                meta_configuration)
                            cost = meta_runs.loc[meta_dataset,
                                                 meta_configuration]
                            if read_runtime_data:
                                runtime = meta_durations.loc[
                                    meta_dataset, meta_configuration]
                            else:
                                runtime = 1
                            # TODO read out other status types!
                            meta_runhistory.add(config,
                                                cost,
                                                runtime,
                                                StatusType.SUCCESS,
                                                instance_id=meta_dataset)
                            meta_runs_index += 1
                        except:
                            # TODO maybe add warning
                            pass

                meta_runs_dataset_indices[meta_dataset] = (
                    meta_dataset_start_index, meta_runs_index)
        else:
            if self.acquisition_function == 'EIPS':
                self.logger.critical('Reverting to acquisition function EI!')
                self.acquisition_function = 'EI'
            meta_features_list = []
            meta_features_dict = {}
            metalearning_configurations = []

        self.scenario = AutoMLScenario(self.config_space,
                                       self.total_walltime_limit,
                                       self.func_eval_time_limit,
                                       meta_features_dict, self.tmp_dir,
                                       self.shared_mode)

        types = get_types(self.config_space, self.scenario.feature_array)
        if self.acquisition_function == 'EI':
            rh2EPM = RunHistory2EPM4Cost(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=None,
                                         impute_censored_data=False,
                                         impute_state=None)
            model = RandomForestWithInstances(
                types,
                instance_features=meta_features_list,
                seed=1,
                num_trees=10)
            smac = SMBO(self.scenario, model=model, rng=seed)
        elif self.acquisition_function == 'EIPS':
            rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=None,
                                         impute_censored_data=False,
                                         impute_state=None)
            model = UncorrelatedMultiObjectiveRandomForestWithInstances(
                ['cost', 'runtime'],
                types,
                num_trees=10,
                instance_features=meta_features_list,
                seed=1)
            acquisition_function = EIPS(model)
            smac = SMBO(self.scenario,
                        acquisition_function=acquisition_function,
                        model=model,
                        runhistory2epm=rh2EPM,
                        rng=seed)
        else:
            raise ValueError('Unknown acquisition function value %s!' %
                             self.acquisition_function)

        # Build a runtime model
        # runtime_rf = RandomForestWithInstances(types,
        #                                        instance_features=meta_features_list,
        #                                        seed=1, num_trees=10)
        # runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
        #                                      scenario=self.scenario,
        #                                      success_states=None,
        #                                      impute_censored_data=False,
        #                                      impute_state=None)
        # X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory)
        # runtime_rf.train(X_runtime, y_runtime[:, 1].flatten())
        X_meta, Y_meta = rh2EPM.transform(meta_runhistory)
        # Transform Y_meta on a per-dataset base
        for meta_dataset in meta_runs_dataset_indices:
            start_index, end_index = meta_runs_dataset_indices[meta_dataset]
            end_index += 1  # Python indexing
            Y_meta[start_index:end_index, 0]\
                [Y_meta[start_index:end_index, 0] >2.0] =  2.0
            dataset_minimum = np.min(Y_meta[start_index:end_index, 0])
            Y_meta[start_index:end_index,
                   0] = 1 - ((1. - Y_meta[start_index:end_index, 0]) /
                             (1. - dataset_minimum))
            Y_meta[start_index:end_index, 0]\
                  [Y_meta[start_index:end_index, 0] > 2] = 2

        # == first, evaluate all metelearning and default configurations
        for i, next_config in enumerate(
            ([default_cfg] + metalearning_configurations)):
            # Do not evaluate default configurations more than once
            if i >= len([default_cfg]) and next_config in [default_cfg]:
                continue

            config_name = 'meta-learning' if i >= len([default_cfg]) \
                else 'default'

            self.logger.info(
                "Starting to evaluate %d. configuration "
                "(%s configuration) with time limit %ds.", num_run,
                config_name, self.func_eval_time_limit)
            self.logger.info(next_config)
            self.reset_data_manager()
            info = eval_with_limits(self.datamanager, self.tmp_dir,
                                    next_config, seed, num_run,
                                    self.resampling_strategy,
                                    self.resampling_strategy_args,
                                    self.memory_limit,
                                    self.func_eval_time_limit)
            (duration, result, _, additional_run_info, status) = info
            run_history.add(config=next_config,
                            cost=result,
                            time=duration,
                            status=status,
                            instance_id=instance_id,
                            seed=seed)
            run_history.update_cost(next_config, result)
            self.logger.info(
                "Finished evaluating %d. configuration. "
                "Duration %f; loss %f; status %s; additional run "
                "info: %s ", num_run, duration, result, str(status),
                additional_run_info)
            num_run += 1
            if smac.incumbent is None:
                smac.incumbent = next_config
            elif result < run_history.get_cost(smac.incumbent):
                smac.incumbent = next_config

            if self.scenario.shared_model:
                pSMAC.write(run_history=run_history,
                            output_directory=self.scenario.output_dir,
                            num_run=self.seed)

        # == after metalearning run SMAC loop
        smac.runhistory = run_history
        smac_iter = 0
        finished = False
        while not finished:
            if self.scenario.shared_model:
                pSMAC.read(run_history=run_history,
                           output_directory=self.scenario.output_dir,
                           configuration_space=self.config_space,
                           logger=self.logger)

            next_configs = []
            time_for_choose_next = -1
            try:
                X_cfg, Y_cfg = rh2EPM.transform(run_history)

                if not run_history.empty():
                    # Update costs by normalization
                    dataset_minimum = np.min(Y_cfg[:, 0])
                    Y_cfg[:, 0] = 1 - ((1. - Y_cfg[:, 0]) /
                                       (1. - dataset_minimum))
                    Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2

                if len(X_meta) > 0 and len(X_cfg) > 0:
                    pass
                    #X_cfg = np.concatenate((X_meta, X_cfg))
                    #Y_cfg = np.concatenate((Y_meta, Y_cfg))
                elif len(X_meta) > 0:
                    X_cfg = X_meta.copy()
                    Y_cfg = Y_meta.copy()
                elif len(X_cfg) > 0:
                    X_cfg = X_cfg.copy()
                    Y_cfg = Y_cfg.copy()
                else:
                    raise ValueError(
                        'No training data for SMAC random forest!')

                self.logger.info('Using %d training points for SMAC.' %
                                 X_cfg.shape[0])
                choose_next_start_time = time.time()
                next_configs_tmp = smac.choose_next(
                    X_cfg,
                    Y_cfg,
                    num_interleaved_random=110,
                    num_configurations_by_local_search=10,
                    num_configurations_by_random_search_sorted=100)
                time_for_choose_next = time.time() - choose_next_start_time
                self.logger.info('Used %g seconds to find next '
                                 'configurations' % (time_for_choose_next))
                next_configs.extend(next_configs_tmp)
            # TODO put Exception here!
            except Exception as e:
                self.logger.error(e)
                self.logger.error("Error in getting next configurations "
                                  "with SMAC. Using random configuration!")
                next_config = self.config_space.sample_configuration()
                next_configs.append(next_config)

            models_fitted_this_iteration = 0
            start_time_this_iteration = time.time()
            for next_config in next_configs:
                x_runtime = impute_inactive_values(next_config)
                x_runtime = impute_inactive_values(x_runtime).get_array()
                # predicted_runtime = runtime_rf.predict_marginalized_over_instances(
                #     x_runtime.reshape((1, -1)))
                # predicted_runtime = np.exp(predicted_runtime[0][0][0]) - 1

                self.logger.info(
                    "Starting to evaluate %d. configuration (from "
                    "SMAC) with time limit %ds.", num_run,
                    self.func_eval_time_limit)
                self.logger.info(next_config)
                self.reset_data_manager()
                info = eval_with_limits(self.datamanager, self.tmp_dir,
                                        next_config, seed, num_run,
                                        self.resampling_strategy,
                                        self.resampling_strategy_args,
                                        self.memory_limit,
                                        self.func_eval_time_limit)
                (duration, result, _, additional_run_info, status) = info
                run_history.add(config=next_config,
                                cost=result,
                                time=duration,
                                status=status,
                                instance_id=instance_id,
                                seed=seed)
                run_history.update_cost(next_config, result)

                #self.logger.info('Predicted runtime %g, true runtime %g',
                #                 predicted_runtime, duration)

                # TODO add unittest to make sure everything works fine and
                # this does not get outdated!
                if smac.incumbent is None:
                    smac.incumbent = next_config
                elif result < run_history.get_cost(smac.incumbent):
                    smac.incumbent = next_config

                self.logger.info(
                    "Finished evaluating %d. configuration. "
                    "Duration: %f; loss: %f; status %s; additional "
                    "run info: %s ", num_run, duration, result, str(status),
                    additional_run_info)
                smac_iter += 1
                num_run += 1

                models_fitted_this_iteration += 1
                time_used_this_iteration = time.time(
                ) - start_time_this_iteration
                if models_fitted_this_iteration >= 2 and \
                        time_for_choose_next > 0 and \
                        time_used_this_iteration > time_for_choose_next:
                    break
                elif time_for_choose_next <= 0 and \
                        models_fitted_this_iteration >= 1:
                    break
                elif models_fitted_this_iteration >= 50:
                    break

                if max_iters is not None:
                    finished = (smac_iter < max_iters)

            if self.scenario.shared_model:
                pSMAC.write(run_history=run_history,
                            output_directory=self.scenario.output_dir,
                            num_run=self.seed)

Beispiel #2

Datei anzeigen

Datei: metalearner.py Projekt: wanjinchang/auto-sklearn

class MetaLearningOptimizer(object):
    def __init__(self,
                 dataset_name,
                 configuration_space,
                 aslib_directory,
                 distance='l1',
                 seed=None,
                 use_features='',
                 distance_kwargs=None,
                 subset='all'):
        """Metalearning optimizer.

        Parameters
        ----------
        dataset_name : str
            Name of the dataset

        configuration_space : HPOlibConfigSpace.configuration_space.ConfigurationSpace

        datasets_file : str
            Path to an aslib directory

        distance : str, "l1" or "l2" or "random"
            Distance function to be used by the kNearestDatasets algorithm.

        seed

        use_features

        metric_kwargs

        subset
        """
        self.dataset_name = dataset_name
        self.configuration_space = configuration_space
        self.aslib_dir = aslib_directory
        self.distance = distance
        self.seed = seed
        self.use_features = use_features
        self.distance_kwargs = distance_kwargs
        self.subset = subset
        self.kND = None  # For caching, makes things faster...

        self.meta_base = MetaBase(configuration_space, self.aslib_dir)
        self.logger = logging.getLogger(__name__)

    def perform_sequential_optimization(self,
                                        target_algorithm=test_function,
                                        time_budget=None,
                                        evaluation_budget=None):
        raise NotImplementedError("Right now this is not implemented due to "
                                  "timing issues.")
        time_taken = 0
        num_evaluations = 0
        history = []

        self.logger.info("Taking distance measure %s" % self.distance)
        while True:
            if time_budget is not None and time_taken >= time_budget:
                self.logger.info("Reached time budget. Exiting optimization.")
                break
            if evaluation_budget is not None and \
                    num_evaluations >= evaluation_budget:
                self.logger.info(
                    "Reached maximum number of evaluations. Exiting "
                    "optimization.")
                break

            params = self.metalearning_suggest(history)

            fixed_params = OrderedDict()
            # Hack to remove all trailing - from the params which are
            # accidently in the experiment pickle of the current HPOlib version
            for key in params:
                if key[0] == "-":
                    fixed_params[key[1:]] = params[key]
                else:
                    fixed_params[key] = params[key]

            self.logger.info(
                "%d/%d, parameters: %s" %
                (num_evaluations, evaluation_budget, str(fixed_params)))
            result = target_algorithm(fixed_params)
            history.append(Run(params, result))
            num_evaluations += 1

        return min([run.result for run in history])

    def metalearning_suggest_all(self, exclude_double_configurations=True):
        """Return a list of the best hyperparameters of neighboring datasets"""
        # TODO check if _learn was called before!
        neighbors = self._learn(exclude_double_configurations)
        hp_list = []
        for neighbor in neighbors:
            try:
                configuration = \
                    self.meta_base.get_configuration_from_algorithm_index(
                    neighbor[2])
                self.logger.info("%s %s %s" %
                                 (neighbor[0], neighbor[1], configuration))
            except (KeyError):
                self.logger.warning("Configuration %s not found" % neighbor[2])
                continue

            hp_list.append(configuration)
        return hp_list

    def metalearning_suggest(self, history):
        """Suggest the next most promising hyperparameters which were not yet evaluated"""
        # TODO test the object in the history!
        neighbors = self._learn()
        # Iterate over all datasets which are sorted ascending by distance

        history_with_indices = []
        for run in history:
            history_with_indices.append(\
                self.meta_base.get_algorithm_index_from_configuration(run))

        for idx, neighbor in enumerate(neighbors):
            already_evaluated = False
            # Check if that dataset was already evaluated
            for run in history_with_indices:
                # If so, return to the outer loop

                if neighbor[2] == run:
                    already_evaluated = True
                    break

            if not already_evaluated:
                self.logger.info(
                    "Nearest dataset with hyperparameters of best value "
                    "not evaluated yet is %s with a distance of %f" %
                    (neighbor[0], neighbor[1]))
                return self.meta_base.get_configuration_from_algorithm_index(
                    neighbor[2])
        raise StopIteration("No more values available.")

    def _learn(self, exclude_double_configurations=True):
        dataset_metafeatures, all_other_metafeatures = self._get_metafeatures()

        # Remove metafeatures which could not be calculated for the target
        # dataset
        keep = []
        for idx in dataset_metafeatures.index:
            if np.isfinite(dataset_metafeatures.loc[idx]):
                keep.append(idx)

        dataset_metafeatures = dataset_metafeatures.loc[keep]
        all_other_metafeatures = all_other_metafeatures.loc[:, keep]

        # Do mean imputation of all other metafeatures
        all_other_metafeatures = all_other_metafeatures.fillna(
            all_other_metafeatures.mean())

        if self.kND is None:
            # In case that we learn our distance function, get_value the parameters for
            #  the random forest
            if self.distance_kwargs:
                rf_params = ast.literal_eval(self.distance_kwargs)
            else:
                rf_params = None

            # To keep the distance the same in every iteration, we create a new
            # random state
            random_state = sklearn.utils.check_random_state(self.seed)
            kND = KNearestDatasets(metric=self.distance,
                                   random_state=random_state,
                                   metric_params=rf_params)

            runs = dict()
            # TODO move this code to the metabase
            for task_id in all_other_metafeatures.index:
                try:
                    runs[task_id] = self.meta_base.get_runs(task_id)
                except KeyError:
                    # TODO should I really except this?
                    self.logger.warning("Could not find runs for instance %s" %
                                        task_id)
                    runs[task_id] = pd.Series([], name=task_id)
            runs = pd.DataFrame(runs)

            kND.fit(all_other_metafeatures, runs)
            self.kND = kND
        return self.kND.kBestSuggestions(
            dataset_metafeatures,
            k=-1,
            exclude_double_configurations=exclude_double_configurations)

    def _get_metafeatures(self):
        """This is inside an extra function for testing purpose"""
        # Load the task

        self.logger.info("Going to use the metafeature subset: %s",
                         self.subset)
        all_metafeatures = self.meta_base.get_all_metafeatures()
        self.logger.info(" ".join(all_metafeatures.columns))

        # TODO: buggy and hacky, replace with a list seperated by commas
        if self.use_features and \
                (type(self.use_features) != str or self.use_features != ''):
            #ogger.warn("Going to keep the following features %s",
            #        str(self.use_features))
            if type(self.use_features) == str:
                use_features = self.use_features.split(",")
            elif type(self.use_features) in (list, np.ndarray):
                use_features = self.use_features
            else:
                raise NotImplementedError(type(self.use_features))

            if len(use_features) == 0:
                self.logger.info(
                    "You just tried to remove all metafeatures...")
            else:
                keep = [
                    col for col in all_metafeatures.columns
                    if col in use_features
                ]
                if len(use_features) == 0:
                    self.logger.info(
                        "You just tried to remove all metafeatures...")
                else:
                    all_metafeatures = all_metafeatures.loc[:, keep]
                    self.logger.info(
                        "Going to keep the following metafeatures:")
                    self.logger.info(str(keep))

        return self._split_metafeature_array(self.dataset_name,
                                             all_metafeatures)

    def _split_metafeature_array(self, dataset_name, metafeatures):
        """Split the metafeature array into dataset metafeatures and all other.

        This is inside an extra function for testing purpose.
        """
        dataset_metafeatures = metafeatures.loc[dataset_name].copy()
        metafeatures = metafeatures[metafeatures.index != dataset_name]
        return dataset_metafeatures, metafeatures

    def read_task_list(self, fh):
        dataset_filenames = list()
        for line in fh:
            line = line.replace("\n", "")
            if line:
                dataset_filenames.append(line)
            else:
                raise ValueError("Blank lines in the task list are not "
                                 "supported.")
        return dataset_filenames

    def read_experiments_list(self, fh):
        experiments_list = list()
        for line in fh.readlines():
            experiments_list.append(line.split())
        return experiments_list

Beispiel #3

Datei anzeigen

Datei: metalearner.py Projekt: Allen1203/auto-sklearn

class MetaLearningOptimizer(object):
    def __init__(self, dataset_name, configuration_space,
                 aslib_directory, distance='l1', seed=None, use_features='',
                 distance_kwargs=None, subset='all'):
        """Metalearning optimizer.

        Parameters
        ----------
        dataset_name : str
            Name of the dataset

        configuration_space : HPOlibConfigSpace.configuration_space.ConfigurationSpace

        datasets_file : str
            Path to an aslib directory

        distance : str, "l1" or "l2" or "random"
            Distance function to be used by the kNearestDatasets algorithm.

        seed

        use_features

        metric_kwargs

        subset
        """
        self.dataset_name = dataset_name
        self.configuration_space = configuration_space
        self.aslib_dir = aslib_directory
        self.distance = distance
        self.seed = seed
        self.use_features = use_features
        self.distance_kwargs = distance_kwargs
        self.subset = subset
        self.kND = None     # For caching, makes things faster...

        self.meta_base = MetaBase(configuration_space, self.aslib_dir)
        self.logger = logging.getLogger(__name__)

    def perform_sequential_optimization(self, target_algorithm=test_function,
                                        time_budget=None,
                                        evaluation_budget=None):
        raise NotImplementedError("Right now this is not implemented due to "
                                  "timing issues.")
        time_taken = 0
        num_evaluations = 0
        history = []

        self.logger.info("Taking distance measure %s" % self.distance)
        while True:
            if time_budget is not None and time_taken >= time_budget:
                self.logger.info("Reached time budget. Exiting optimization.")
                break
            if evaluation_budget is not None and \
                    num_evaluations >= evaluation_budget:
                self.logger.info("Reached maximum number of evaluations. Exiting "
                            "optimization.")
                break

            params = self.metalearning_suggest(history)

            fixed_params = OrderedDict()
            # Hack to remove all trailing - from the params which are
            # accidently in the experiment pickle of the current HPOlib version
            for key in params:
                if key[0] == "-":
                    fixed_params[key[1:]] = params[key]
                else:
                    fixed_params[key] = params[key]

            self.logger.info("%d/%d, parameters: %s" % (num_evaluations,
                                                   evaluation_budget,
                                                   str(fixed_params)))
            result = target_algorithm(fixed_params)
            history.append(Run(params, result))
            num_evaluations += 1

        return min([run.result for run in history])

    def metalearning_suggest_all(self, exclude_double_configurations=True):
        """Return a list of the best hyperparameters of neighboring datasets"""
        # TODO check if _learn was called before!
        neighbors = self._learn(exclude_double_configurations)
        hp_list = []
        for neighbor in neighbors:
            try:
                configuration = \
                    self.meta_base.get_configuration_from_algorithm_index(
                    neighbor[2])
                self.logger.info("%s %s %s" % (neighbor[0], neighbor[1], configuration))
            except (KeyError):
                self.logger.warning("Configuration %s not found" % neighbor[2])
                continue

            hp_list.append(configuration)
        return hp_list

    def metalearning_suggest(self, history):
        """Suggest the next most promising hyperparameters which were not yet evaluated"""
        # TODO test the object in the history!
        neighbors = self._learn()
        # Iterate over all datasets which are sorted ascending by distance

        history_with_indices = []
        for run in history:
            history_with_indices.append(\
                self.meta_base.get_algorithm_index_from_configuration(run))

        for idx, neighbor in enumerate(neighbors):
            already_evaluated = False
            # Check if that dataset was already evaluated
            for run in history_with_indices:
                # If so, return to the outer loop

                if neighbor[2] == run:
                    already_evaluated = True
                    break

            if not already_evaluated:
                self.logger.info("Nearest dataset with hyperparameters of best value "
                            "not evaluated yet is %s with a distance of %f" %
                            (neighbor[0], neighbor[1]))
                return self.meta_base.get_configuration_from_algorithm_index(
                    neighbor[2])
        raise StopIteration("No more values available.")

    def _learn(self, exclude_double_configurations=True):
        dataset_metafeatures, all_other_metafeatures = self._get_metafeatures()

        # Remove metafeatures which could not be calculated for the target
        # dataset
        keep = []
        for idx in dataset_metafeatures.index:
            if np.isfinite(dataset_metafeatures.loc[idx]):
               keep.append(idx)

        dataset_metafeatures = dataset_metafeatures.loc[keep]
        all_other_metafeatures = all_other_metafeatures.loc[:,keep]

        # Do mean imputation of all other metafeatures
        all_other_metafeatures = all_other_metafeatures.fillna(
            all_other_metafeatures.mean())

        if self.kND is None:
            # In case that we learn our distance function, get_value the parameters for
            #  the random forest
            if self.distance_kwargs:
                rf_params = ast.literal_eval(self.distance_kwargs)
            else:
                rf_params = None

            # To keep the distance the same in every iteration, we create a new
            # random state
            random_state = sklearn.utils.check_random_state(self.seed)
            kND = KNearestDatasets(metric=self.distance,
                                   random_state=random_state,
                                   metric_params=rf_params)

            runs = dict()
            # TODO move this code to the metabase
            for task_id in all_other_metafeatures.index:
                try:
                    runs[task_id] = self.meta_base.get_runs(task_id)
                except KeyError:
                    # TODO should I really except this?
                    self.logger.warning("Could not find runs for instance %s" % task_id)
                    runs[task_id] = pd.Series([], name=task_id)
            runs = pd.DataFrame(runs)

            kND.fit(all_other_metafeatures, runs)
            self.kND = kND
        return self.kND.kBestSuggestions(dataset_metafeatures, k=-1,
            exclude_double_configurations=exclude_double_configurations)

    def _get_metafeatures(self):
        """This is inside an extra function for testing purpose"""
        # Load the task

        self.logger.info("Going to use the metafeature subset: %s", self.subset)
        all_metafeatures = self.meta_base.get_all_metafeatures()
        self.logger.info(" ".join(all_metafeatures.columns))

        # TODO: buggy and hacky, replace with a list seperated by commas
        if self.use_features and \
                (type(self.use_features) != str or self.use_features != ''):
            #ogger.warn("Going to keep the following features %s",
            #        str(self.use_features))
            if type(self.use_features) == str:
                use_features = self.use_features.split(",")
            elif type(self.use_features) in (list, np.ndarray):
                use_features = self.use_features
            else:
                raise NotImplementedError(type(self.use_features))

            if len(use_features) == 0:
                self.logger.info("You just tried to remove all metafeatures...")
            else:
                keep = [col for col in all_metafeatures.columns if col in use_features]
                if len(use_features) == 0:
                    self.logger.info("You just tried to remove all metafeatures...")
                else:
                    all_metafeatures = all_metafeatures.loc[:,keep]
                    self.logger.info("Going to keep the following metafeatures:")
                    self.logger.info(str(keep))

        return self._split_metafeature_array(self.dataset_name, all_metafeatures)

    def _split_metafeature_array(self, dataset_name, metafeatures):
        """Split the metafeature array into dataset metafeatures and all other.

        This is inside an extra function for testing purpose.
        """
        dataset_metafeatures = metafeatures.loc[dataset_name].copy()
        metafeatures = metafeatures[metafeatures.index != dataset_name]
        return dataset_metafeatures, metafeatures

    def read_task_list(self, fh):
        dataset_filenames = list()
        for line in fh:
            line = line.replace("\n", "")
            if line:
                dataset_filenames.append(line)
            else:
                raise ValueError("Blank lines in the task list are not "
                                 "supported.")
        return dataset_filenames

    def read_experiments_list(self, fh):
        experiments_list = list()
        for line in fh.readlines():
            experiments_list.append(line.split())
        return experiments_list

Beispiel #4

Datei anzeigen

Datei: smbo.py Projekt: Ayaro/auto-sklearn

    def run_smbo(self, max_iters=1000):
        global evaluator

        # == first things first: load the datamanager
        self.reset_data_manager()
        
        # == Initialize SMBO stuff
        # first create a scenario
        seed = self.seed # TODO
        num_params = len(self.config_space.get_hyperparameters())
        # allocate a run history
        run_history = RunHistory()
        meta_runhistory = RunHistory()
        meta_runs_dataset_indices = {}
        num_run = self.start_num_run
        instance_id = self.dataset_name + SENTINEL

        # == Train on subset
        #    before doing anything, let us run the default_cfg
        #    on a subset of the available data to ensure that
        #    we at least have some models
        #    we will try three different ratios of decreasing magnitude
        #    in the hope that at least on the last one we will be able
        #    to get a model
        n_data = self.datamanager.data['X_train'].shape[0]
        subset_ratio = 10000. / n_data
        if subset_ratio >= 0.5:
            subset_ratio = 0.33
            subset_ratios = [subset_ratio, subset_ratio * 0.10]
        else:
            subset_ratios = [subset_ratio, 500. / n_data]
        self.logger.info("Training default configurations on a subset of "
                         "%d/%d data points." %
                         (int(n_data * subset_ratio), n_data))

        # the time limit for these function evaluations is rigorously
        # set to only 1/2 of a full function evaluation
        subset_time_limit = max(5, int(self.func_eval_time_limit / 2))
        # the configs we want to run on the data subset are:
        # 1) the default configs
        # 2) a set of configs we selected for training on a subset
        subset_configs = [self.config_space.get_default_configuration()] \
                          + self.collect_additional_subset_defaults()
        subset_config_succesful = [False] * len(subset_configs)
        for subset_config_id, next_config in enumerate(subset_configs):
            for i, ratio in enumerate(subset_ratios):
                self.reset_data_manager()
                n_data_subsample = int(n_data * ratio)

                # run the config, but throw away the result afterwards
                # since this cfg was evaluated only on a subset
                # and we don't want  to confuse SMAC
                self.logger.info("Starting to evaluate %d on SUBSET "
                                 "with size %d and time limit %ds.",
                                 num_run, n_data_subsample,
                                 subset_time_limit)
                self.logger.info(next_config)
                _info = eval_with_limits(
                    self.datamanager, self.tmp_dir, next_config,
                    seed, num_run,
                    self.resampling_strategy,
                    self.resampling_strategy_args,
                    self.memory_limit,
                    subset_time_limit, n_data_subsample)
                (duration, result, _, additional_run_info, status) = _info
                self.logger.info("Finished evaluating %d. configuration on SUBSET. "
                                 "Duration %f; loss %f; status %s; additional run "
                                 "info: %s ", num_run, duration, result,
                                 str(status), additional_run_info)

                num_run += 1
                if i < len(subset_ratios) - 1:
                    if status != StatusType.SUCCESS:
                        # Do not increase num_run here, because we will try
                        # the same configuration with less data
                        self.logger.info("A CONFIG did not finish "
                                         " for subset ratio %f -> going smaller",
                                         ratio)
                        continue
                    else:
                        self.logger.info("Finished SUBSET training sucessfully "
                                         "with ratio %f", ratio)
                        subset_config_succesful[subset_config_id] = True
                        break
                else:
                    if status != StatusType.SUCCESS:
                        self.logger.info("A CONFIG did not finish "
                                         " for subset ratio %f.",
                                         ratio)
                        continue
                    else:
                        self.logger.info("Finished SUBSET training sucessfully "
                                         "with ratio %f", ratio)
                        subset_config_succesful[subset_config_id] = True
                        break

        # Use the first non-failing configuration from the subsets as the new
        #  default configuration -> this guards us against the random forest
        # failing on large, sparse datasets
        default_cfg = None
        for subset_config_id, next_config in enumerate(subset_configs):
            if subset_config_succesful[subset_config_id]:
                default_cfg = next_config
                break
        if default_cfg is None:
            default_cfg = self.config_space.get_default_configuration()

        # == METALEARNING suggestions
        # we start by evaluating the defaults on the full dataset again
        # and add the suggestions from metalearning behind it

        if self.metadata_directory is None:
            metalearning_directory = os.path.dirname(
                autosklearn.metalearning.__file__)
            # There is no multilabel data in OpenML
            if self.task == MULTILABEL_CLASSIFICATION:
                meta_task = BINARY_CLASSIFICATION
            else:
                meta_task = self.task
            metadata_directory = os.path.join(
                metalearning_directory, 'files',
                '%s_%s_%s' % (METRIC_TO_STRING[self.metric],
                              TASK_TYPES_TO_STRING[meta_task],
                              'sparse' if self.datamanager.info['is_sparse']
                              else 'dense'))
            self.metadata_directory = metadata_directory

        self.logger.info('Metadata directory: %s', self.metadata_directory)
        meta_base = MetaBase(self.config_space, self.metadata_directory)

        metafeature_calculation_time_limit = int(
            self.total_walltime_limit / 4)
        metafeature_calculation_start_time = time.time()
        meta_features = self._calculate_metafeatures_with_limits(
            metafeature_calculation_time_limit)
        metafeature_calculation_end_time = time.time()
        metafeature_calculation_time_limit = \
            metafeature_calculation_time_limit - (
            metafeature_calculation_end_time -
            metafeature_calculation_start_time)

        if metafeature_calculation_time_limit < 1:
            self.logger.warning('Time limit for metafeature calculation less '
                                'than 1 seconds (%f). Skipping calculation '
                                'of metafeatures for encoded dataset.',
                                metafeature_calculation_time_limit)
            meta_features_encoded = None
        else:
            self.datamanager.perform1HotEncoding()
            meta_features_encoded = \
                self._calculate_metafeatures_encoded_with_limits(
                    metafeature_calculation_time_limit)

        # In case there is a problem calculating the encoded meta-features
        if meta_features is None:
            if meta_features_encoded is not None:
                meta_features = meta_features_encoded
        else:
            if meta_features_encoded is not None:
                meta_features.metafeature_values.update(
                    meta_features_encoded.metafeature_values)

        if meta_features is not None:
            meta_base.add_dataset(instance_id, meta_features)
            # Do mean imputation of the meta-features - should be done specific
            # for each prediction model!
            all_metafeatures = meta_base.get_metafeatures(
                features=list(meta_features.keys()))
            all_metafeatures.fillna(all_metafeatures.mean(), inplace=True)

            metalearning_configurations = self.collect_metalearning_suggestions(
                meta_base)
            if metalearning_configurations is None:
                metalearning_configurations = []
            self.reset_data_manager()

            self.logger.info('%s', meta_features)

            # Convert meta-features into a dictionary because the scenario
            # expects a dictionary
            meta_features_dict = {}
            for dataset, series in all_metafeatures.iterrows():
                meta_features_dict[dataset] = series.values
            meta_features_list = []
            for meta_feature_name in all_metafeatures.columns:
                meta_features_list.append(meta_features[meta_feature_name].value)
            meta_features_list = np.array(meta_features_list).reshape((1, -1))
            self.logger.info(list(meta_features_dict.keys()))

            meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric])
            meta_runs_index = 0
            try:
                meta_durations = meta_base.get_all_runs('runtime')
                read_runtime_data = True
            except KeyError:
                read_runtime_data = False
                self.logger.critical('Cannot read runtime data.')
                if self.acquisition_function == 'EIPS':
                    self.logger.critical('Reverting to acquisition function EI!')
                    self.acquisition_function = 'EI'

            for meta_dataset in meta_runs.index:
                meta_dataset_start_index = meta_runs_index
                for meta_configuration in meta_runs.columns:
                    if np.isfinite(meta_runs.loc[meta_dataset, meta_configuration]):
                        try:
                            config = meta_base.get_configuration_from_algorithm_index(
                                meta_configuration)
                            cost = meta_runs.loc[meta_dataset, meta_configuration]
                            if read_runtime_data:
                                runtime = meta_durations.loc[meta_dataset,
                                                             meta_configuration]
                            else:
                                runtime = 1
                            # TODO read out other status types!
                            meta_runhistory.add(config, cost, runtime,
                                                StatusType.SUCCESS,
                                                instance_id=meta_dataset)
                            meta_runs_index += 1
                        except:
                            # TODO maybe add warning
                            pass

                meta_runs_dataset_indices[meta_dataset] = (
                    meta_dataset_start_index, meta_runs_index)
        else:
            if self.acquisition_function == 'EIPS':
                self.logger.critical('Reverting to acquisition function EI!')
                self.acquisition_function = 'EI'
            meta_features_list = []
            meta_features_dict = {}
            metalearning_configurations = []

        self.scenario = AutoMLScenario(self.config_space,
                                       self.total_walltime_limit,
                                       self.func_eval_time_limit,
                                       meta_features_dict,
                                       self.tmp_dir,
                                       self.shared_mode)

        types = get_types(self.config_space, self.scenario.feature_array)
        if self.acquisition_function == 'EI':
            rh2EPM = RunHistory2EPM4Cost(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=None,
                                         impute_censored_data=False,
                                         impute_state=None)
            model = RandomForestWithInstances(types,
                                              instance_features=meta_features_list,
                                              seed=1, num_trees=10)
            smac = SMBO(self.scenario, model=model,
                        rng=seed)
        elif self.acquisition_function == 'EIPS':
            rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=None,
                                         impute_censored_data=False,
                                         impute_state=None)
            model = UncorrelatedMultiObjectiveRandomForestWithInstances(
                ['cost', 'runtime'], types, num_trees = 10,
                instance_features=meta_features_list, seed=1)
            acquisition_function = EIPS(model)
            smac = SMBO(self.scenario,
                        acquisition_function=acquisition_function,
                        model=model, runhistory2epm=rh2EPM, rng=seed)
        else:
            raise ValueError('Unknown acquisition function value %s!' %
                             self.acquisition_function)

        # Build a runtime model
        # runtime_rf = RandomForestWithInstances(types,
        #                                        instance_features=meta_features_list,
        #                                        seed=1, num_trees=10)
        # runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
        #                                      scenario=self.scenario,
        #                                      success_states=None,
        #                                      impute_censored_data=False,
        #                                      impute_state=None)
        # X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory)
        # runtime_rf.train(X_runtime, y_runtime[:, 1].flatten())
        X_meta, Y_meta = rh2EPM.transform(meta_runhistory)
        # Transform Y_meta on a per-dataset base
        for meta_dataset in meta_runs_dataset_indices:
            start_index, end_index = meta_runs_dataset_indices[meta_dataset]
            end_index += 1  # Python indexing
            Y_meta[start_index:end_index, 0]\
                [Y_meta[start_index:end_index, 0] >2.0] =  2.0
            dataset_minimum = np.min(Y_meta[start_index:end_index, 0])
            Y_meta[start_index:end_index, 0] = 1 - (
                (1. - Y_meta[start_index:end_index, 0]) /
                (1. - dataset_minimum))
            Y_meta[start_index:end_index, 0]\
                  [Y_meta[start_index:end_index, 0] > 2] = 2

        # == first, evaluate all metelearning and default configurations
        for i, next_config in enumerate(([default_cfg] +
                                          metalearning_configurations)):
            # Do not evaluate default configurations more than once
            if i >= len([default_cfg]) and next_config in [default_cfg]:
                continue

            config_name = 'meta-learning' if i >= len([default_cfg]) \
                else 'default'

            self.logger.info("Starting to evaluate %d. configuration "
                             "(%s configuration) with time limit %ds.",
                             num_run, config_name, self.func_eval_time_limit)
            self.logger.info(next_config)
            self.reset_data_manager()
            info = eval_with_limits(self.datamanager, self.tmp_dir, next_config,
                                    seed, num_run,
                                    self.resampling_strategy,
                                    self.resampling_strategy_args,
                                    self.memory_limit,
                                    self.func_eval_time_limit)
            (duration, result, _, additional_run_info, status) = info
            run_history.add(config=next_config, cost=result,
                            time=duration , status=status,
                            instance_id=instance_id, seed=seed)
            run_history.update_cost(next_config, result)
            self.logger.info("Finished evaluating %d. configuration. "
                             "Duration %f; loss %f; status %s; additional run "
                             "info: %s ", num_run, duration, result,
                             str(status), additional_run_info)
            num_run += 1
            if smac.incumbent is None:
                smac.incumbent = next_config
            elif result < run_history.get_cost(smac.incumbent):
                smac.incumbent = next_config

            if self.scenario.shared_model:
                pSMAC.write(run_history=run_history,
                            output_directory=self.scenario.output_dir,
                            num_run=self.seed)

        # == after metalearning run SMAC loop
        smac.runhistory = run_history
        smac_iter = 0
        finished = False
        while not finished:
            if self.scenario.shared_model:
                pSMAC.read(run_history=run_history,
                           output_directory=self.scenario.output_dir,
                           configuration_space=self.config_space,
                           logger=self.logger)

            next_configs = []
            time_for_choose_next = -1
            try:
                X_cfg, Y_cfg = rh2EPM.transform(run_history)

                if not run_history.empty():
                    # Update costs by normalization
                    dataset_minimum = np.min(Y_cfg[:, 0])
                    Y_cfg[:, 0] = 1 - ((1. - Y_cfg[:, 0]) /
                                       (1. - dataset_minimum))
                    Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2

                if len(X_meta) > 0 and len(X_cfg) > 0:
                    pass
                    #X_cfg = np.concatenate((X_meta, X_cfg))
                    #Y_cfg = np.concatenate((Y_meta, Y_cfg))
                elif len(X_meta) > 0:
                    X_cfg = X_meta.copy()
                    Y_cfg = Y_meta.copy()
                elif len(X_cfg) > 0:
                    X_cfg = X_cfg.copy()
                    Y_cfg = Y_cfg.copy()
                else:
                    raise ValueError('No training data for SMAC random forest!')

                self.logger.info('Using %d training points for SMAC.' %
                                 X_cfg.shape[0])
                choose_next_start_time = time.time()
                next_configs_tmp = smac.choose_next(X_cfg, Y_cfg,
                                                    num_interleaved_random=110,
                                                    num_configurations_by_local_search=10,
                                                    num_configurations_by_random_search_sorted=100)
                time_for_choose_next = time.time() - choose_next_start_time
                self.logger.info('Used %g seconds to find next '
                                 'configurations' % (time_for_choose_next))
                next_configs.extend(next_configs_tmp)
            # TODO put Exception here!
            except Exception as e:
                self.logger.error(e)
                self.logger.error("Error in getting next configurations "
                                  "with SMAC. Using random configuration!")
                next_config = self.config_space.sample_configuration()
                next_configs.append(next_config)

            models_fitted_this_iteration = 0
            start_time_this_iteration = time.time()
            for next_config in next_configs:
                x_runtime = impute_inactive_values(next_config)
                x_runtime = impute_inactive_values(x_runtime).get_array()
                # predicted_runtime = runtime_rf.predict_marginalized_over_instances(
                #     x_runtime.reshape((1, -1)))
                # predicted_runtime = np.exp(predicted_runtime[0][0][0]) - 1

                self.logger.info("Starting to evaluate %d. configuration (from "
                                 "SMAC) with time limit %ds.", num_run,
                                 self.func_eval_time_limit)
                self.logger.info(next_config)
                self.reset_data_manager()
                info = eval_with_limits(self.datamanager, self.tmp_dir, next_config,
                                        seed, num_run,
                                        self.resampling_strategy,
                                        self.resampling_strategy_args,
                                        self.memory_limit,
                                        self.func_eval_time_limit)
                (duration, result, _, additional_run_info, status) = info
                run_history.add(config=next_config, cost=result,
                                time=duration , status=status,
                                instance_id=instance_id, seed=seed)
                run_history.update_cost(next_config, result)

                #self.logger.info('Predicted runtime %g, true runtime %g',
                #                 predicted_runtime, duration)

                # TODO add unittest to make sure everything works fine and
                # this does not get outdated!
                if smac.incumbent is None:
                    smac.incumbent = next_config
                elif result < run_history.get_cost(smac.incumbent):
                    smac.incumbent = next_config

                self.logger.info("Finished evaluating %d. configuration. "
                                 "Duration: %f; loss: %f; status %s; additional "
                                 "run info: %s ", num_run, duration, result,
                                 str(status), additional_run_info)
                smac_iter += 1
                num_run += 1

                models_fitted_this_iteration += 1
                time_used_this_iteration = time.time() - start_time_this_iteration
                if models_fitted_this_iteration >= 2 and \
                        time_for_choose_next > 0 and \
                        time_used_this_iteration > time_for_choose_next:
                    break
                elif time_for_choose_next <= 0 and \
                        models_fitted_this_iteration >= 1:
                    break
                elif models_fitted_this_iteration >= 50:
                    break

                if max_iters is not None:
                    finished = (smac_iter < max_iters)

            if self.scenario.shared_model:
                pSMAC.write(run_history=run_history,
                            output_directory=self.scenario.output_dir,
                            num_run=self.seed)