Ejemplo n.º 1
0
def get_hpo_cs(estimator_id, task_type=REGRESSION):
    _candidates = get_combined_candidtates(_regressors, _addons)
    if estimator_id in _candidates:
        rgs_class = _candidates[estimator_id]
    else:
        raise ValueError("Algorithm %s not supported!" % estimator_id)
    cs = rgs_class.get_hyperparameter_search_space()
    return cs
Ejemplo n.º 2
0
def get_hpo_cs(estimator_id, task_type=CLASSIFICATION):
    _candidates = get_combined_candidtates(_classifiers, _addons)
    if estimator_id in _candidates:
        clf_class = _candidates[estimator_id]
    else:
        raise ValueError("Algorithm %s not supported!" % estimator_id)
    cs = clf_class.get_hyperparameter_search_space()
    return cs
Ejemplo n.º 3
0
def get_estimator(config, estimator_id):
    from solnml.components.models.classification import _classifiers, _addons
    _candidates = get_combined_candidtates(_classifiers, _addons)
    classifier_type = estimator_id
    config_ = config.copy()
    config_['random_state'] = 1
    estimator = _candidates[classifier_type](**config_)
    if hasattr(estimator, 'n_jobs'):
        setattr(estimator, 'n_jobs', 1)
    return classifier_type, estimator
Ejemplo n.º 4
0
def get_estimator(config, estimator_id):
    regressor_type = estimator_id
    config_ = config.copy()
    config_['%s:random_state' % regressor_type] = 1
    hpo_config = dict()
    for key in config_:
        key_name = key.split(':')[0]
        if regressor_type == key_name:
            act_key = key.split(':')[1]
            hpo_config[act_key] = config_[key]

    _candidates = get_combined_candidtates(_regressors, _addons)
    estimator = _candidates[regressor_type](**hpo_config)
    if hasattr(estimator, 'n_jobs'):
        setattr(estimator, 'n_jobs', 1)
    return regressor_type, estimator
Ejemplo n.º 5
0
def get_cash_cs(include_algorithms=None, task_type=CLASSIFICATION):
    _candidates = get_combined_candidtates(_classifiers, _addons)
    if include_algorithms is not None:
        _candidates = set(include_algorithms).intersection(
            set(_candidates.keys()))
        if len(_candidates) == 0:
            raise ValueError(
                "No algorithms included! Please check the spelling of the included algorithms!"
            )
    cs = ConfigurationSpace()
    algo = CategoricalHyperparameter('algorithm', list(_candidates))
    cs.add_hyperparameter(algo)
    for estimator_id in _candidates:
        estimator_cs = get_hpo_cs(estimator_id)
        parent_hyperparameter = {'parent': algo, 'value': estimator_id}
        cs.add_configuration_space(estimator_id,
                                   estimator_cs,
                                   parent_hyperparameter=parent_hyperparameter)
    return cs
Ejemplo n.º 6
0
    def __init__(self,
                 task_type,
                 estimator_id: str,
                 data: DataNode,
                 metric,
                 include_preprocessors=None,
                 share_fe=False,
                 output_dir='logs',
                 per_run_time_limit=120,
                 per_run_mem_limit=5120,
                 dataset_id='default',
                 eval_type='holdout',
                 mth='rb',
                 sw_size=3,
                 n_jobs=1,
                 seed=1,
                 enable_fe=True,
                 fe_algo='bo',
                 number_of_unit_resource=2,
                 total_resource=30,
                 timestamp=None):
        self.task_type = task_type
        self.metric = metric
        self.number_of_unit_resource = number_of_unit_resource
        # One unit of resource, that's, the number of trials per iteration.
        self.one_unit_of_resource = 5
        self.total_resource = total_resource
        self.per_run_time_limit = per_run_time_limit
        self.per_run_mem_limit = per_run_mem_limit
        self.estimator_id = estimator_id
        self.include_preprocessors = include_preprocessors
        self.evaluation_type = eval_type
        self.original_data = data.copy_()
        self.share_fe = share_fe
        self.output_dir = output_dir
        self.n_jobs = n_jobs
        self.mth = mth
        self.seed = seed
        self.sliding_window_size = sw_size
        task_id = '%s-%d-%s' % (dataset_id, seed, estimator_id)
        self.logger = get_logger(self.__class__.__name__ + '-' + task_id)

        # Bandit settings.
        # self.arms = ['fe', 'hpo']
        self.arms = ['hpo', 'fe']
        self.rewards = dict()
        self.optimizer = dict()
        self.evaluation_cost = dict()
        self.update_flag = dict()
        # Global incumbent.
        self.inc = dict()
        self.local_inc = dict()
        self.local_hist = {'fe': [], 'hpo': []}
        self.inc_record = {'fe': list(), 'hpo': list()}
        self.exp_output = dict()
        self.eval_dict = {'fe': dict(), 'hpo': dict()}
        for arm in self.arms:
            self.rewards[arm] = list()
            self.update_flag[arm] = False
            self.evaluation_cost[arm] = list()
            self.exp_output[arm] = dict()
        self.pull_cnt = 0
        self.action_sequence = list()
        self.final_rewards = list()
        self.incumbent_config = None
        self.incumbent_perf = float("-INF")
        self.early_stopped_flag = False
        self.first_start = True

        self.include_text = True if TEXT in self.original_data.feature_types else False
        self.include_image = True if IMAGE in self.original_data.feature_types else False

        # Fetch hyperparameter space.
        if self.task_type in CLS_TASKS:
            from solnml.components.models.classification import _classifiers, _addons
            _candidates = get_combined_candidtates(_classifiers, _addons)
            if estimator_id in _candidates:
                clf_class = _candidates[estimator_id]
            else:
                raise ValueError("Algorithm %s not supported!" % estimator_id)
            cs = clf_class.get_hyperparameter_search_space()
        elif self.task_type in RGS_TASKS:
            from solnml.components.models.regression import _regressors, _addons
            _candidates = get_combined_candidtates(_regressors, _addons)
            if estimator_id in _candidates:
                reg_class = _candidates[estimator_id]
            else:
                raise ValueError("Algorithm %s not supported!" % estimator_id)
            cs = reg_class.get_hyperparameter_search_space()
        else:
            raise ValueError("Unknown task type %s!" % self.task_type)

        self.config_space = cs
        self.default_config = cs.get_default_configuration()
        self.config_space.seed(self.seed)

        self.if_imbal = is_imbalanced_dataset(self.original_data)

        self.fe_config_space = get_task_hyperparameter_space(
            self.task_type,
            self.estimator_id,
            include_preprocessors=self.include_preprocessors,
            include_text=self.include_text,
            include_image=self.include_image,
            if_imbal=self.if_imbal)
        self.fe_default_config = self.fe_config_space.get_default_configuration(
        )

        self.timestamp = timestamp
        # Build the Feature Engineering component.
        if self.task_type in CLS_TASKS:
            fe_evaluator = ClassificationEvaluator(
                self.default_config,
                self.fe_default_config,
                estimator_id,
                scorer=self.metric,
                data_node=self.original_data,
                name='fe',
                resampling_strategy=self.evaluation_type,
                if_imbal=self.if_imbal,
                seed=self.seed,
                output_dir=self.output_dir,
                timestamp=self.timestamp)
            hpo_evaluator = ClassificationEvaluator(
                self.default_config,
                self.fe_default_config,
                estimator_id,
                scorer=self.metric,
                data_node=self.original_data,
                name='hpo',
                resampling_strategy=self.evaluation_type,
                if_imbal=self.if_imbal,
                seed=self.seed,
                output_dir=self.output_dir,
                timestamp=self.timestamp)

        elif self.task_type in RGS_TASKS:
            fe_evaluator = RegressionEvaluator(
                self.default_config,
                self.fe_default_config,
                estimator_id,
                scorer=self.metric,
                data_node=self.original_data,
                name='fe',
                resampling_strategy=self.evaluation_type,
                seed=self.seed,
                output_dir=self.output_dir,
                timestamp=self.timestamp)
            hpo_evaluator = RegressionEvaluator(
                self.default_config,
                self.fe_default_config,
                estimator_id,
                scorer=self.metric,
                data_node=self.original_data,
                name='hpo',
                resampling_strategy=self.evaluation_type,
                seed=self.seed,
                output_dir=self.output_dir,
                timestamp=self.timestamp)
        else:
            raise ValueError('Invalid task type!')

        if self.mth != 'combined':
            self.enable_fe = enable_fe
            trials_per_iter = self.one_unit_of_resource * self.number_of_unit_resource

            self.optimizer['fe'] = build_fe_optimizer(
                self.evaluation_type,
                fe_evaluator,
                self.fe_config_space,
                per_run_time_limit=per_run_time_limit,
                per_run_mem_limit=per_run_mem_limit,
                inner_iter_num_per_iter=trials_per_iter,
                output_dir=output_dir,
                seed=self.seed,
                n_jobs=n_jobs)

            self.inc['fe'], self.local_inc[
                'fe'] = self.fe_default_config, self.fe_default_config

            # Build the HPO component.
            # trials_per_iter = max(len(self.optimizer['fe'].trans_types), 20)
            trials_per_iter = self.one_unit_of_resource * self.number_of_unit_resource

            self.optimizer['hpo'] = build_hpo_optimizer(
                self.evaluation_type,
                hpo_evaluator,
                cs,
                output_dir=output_dir,
                per_run_time_limit=per_run_time_limit,
                inner_iter_num_per_iter=trials_per_iter,
                seed=self.seed,
                n_jobs=n_jobs)

            self.inc['hpo'], self.local_inc[
                'hpo'] = self.default_config, self.default_config
            self.init_config = cs.get_default_configuration()
            self.local_hist['fe'].append(self.fe_default_config)
            self.local_hist['hpo'].append(self.default_config)

        else:
            self.rewards = list()
            self.evaluation_cost = list()
            self.eval_dict = {}
            trials_per_iter = self.one_unit_of_resource * self.number_of_unit_resource

            if self.task_type in CLS_TASKS:
                from solnml.utils.combined_cls_evaluator import get_combined_cs
                from solnml.utils.combined_cls_evaluator import CombinedClassificationEvaluator as CombinedEvaluator
            else:
                from solnml.utils.combined_rgs_evaluator import get_combined_cs
                from solnml.utils.combined_rgs_evaluator import CombinedRegressionEvaluator as CombinedEvaluator

            self.evaluator = CombinedEvaluator(
                estimator_id,
                scorer=self.metric,
                data_node=self.original_data,
                if_imbal=self.if_imbal,
                timestamp=self.timestamp,
                output_dir=self.output_dir,
                resampling_strategy=self.evaluation_type)
            cs = get_combined_cs(
                self.estimator_id,
                self.task_type,
                include_image=self.include_image,
                include_text=self.include_text,
                include_preprocessors=self.include_preprocessors,
                if_imbal=self.if_imbal)

            self.optimizer = build_hpo_optimizer(
                self.evaluation_type,
                self.evaluator,
                cs,
                output_dir=self.output_dir,
                per_run_time_limit=self.per_run_time_limit,
                inner_iter_num_per_iter=trials_per_iter,
                seed=self.seed,
                n_jobs=self.n_jobs)