Example #1
0
def tune_meta_learner():
    cs = build_configspace()
    def_value = obj_function(cs.get_default_configuration())
    print("Default Value: %.2f" % (def_value))

    bo = BayesianOptimization(obj_function, cs, max_runs=50, time_limit_per_trial=1200)
    bo.run()
    inc_value = bo.get_incumbent()
    config = inc_value[0][0]

    print('Best hyperparameter config found', config)
    return config
Example #2
0
def tune_meta_learner():
    cs = build_configspace()
    def_value = objective_function(cs.get_default_configuration())
    print("Default Value: %.2f" % (def_value))

    bo = BayesianOptimization(objective_function, cs, max_runs=50, time_limit_per_trial=150)
    bo.run()
    inc_value = bo.get_incumbent()
    config = inc_value[0][0]

    with open(meta_dir + 'meta_learner_%s_%s_%s_config.pkl' % (meta_algo, metric, hash_id), 'wb') as f:
        pk.dump(config, f)
    print('Best hyperparameter config found', config)
    return config
    def __init__(self, evaluator, config_space, time_limit=None, evaluation_limit=None,
                 per_run_time_limit=300, per_run_mem_limit=1024, output_dir='./',
                 trials_per_iter=1, seed=1, n_jobs=1):
        super().__init__(evaluator, config_space, seed)
        self.time_limit = time_limit
        self.evaluation_num_limit = evaluation_limit
        self.trials_per_iter = trials_per_iter
        self.per_run_time_limit = per_run_time_limit
        self.per_run_mem_limit = per_run_mem_limit
        self.output_dir = output_dir

        self.optimizer = BO(objective_function=self.evaluator,
                            config_space=config_space,
                            max_runs=int(1e10),
                            task_id=None,
                            time_limit_per_trial=self.per_run_time_limit,
                            rng=np.random.RandomState(self.seed))

        self.trial_cnt = 0
        self.configs = list()
        self.perfs = list()
        self.incumbent_perf = float("-INF")
        self.incumbent_config = self.config_space.get_default_configuration()
        # Estimate the size of the hyperparameter space.
        hp_num = len(self.config_space.get_hyperparameters())
        if hp_num == 0:
            self.config_num_threshold = 0
        else:
            _threshold = int(len(set(self.config_space.sample_configuration(10000))) * 0.75)
            self.config_num_threshold = _threshold
        self.logger.debug('The maximum trial number in HPO is: %d' % self.config_num_threshold)
        self.maximum_config_num = min(600, self.config_num_threshold)
        self.early_stopped_flag = False
        self.eval_dict = {}
def run(dataset_name):
    file_id = '%s-resnet-%s-%d.pkl' % (dataset_name, mode, trial_num)
    saved_file = os.path.join(data_dir, file_id)

    # (x_train, y_train), (x_test, y_test), cls_num = load_dataset(dataset_name)
    # print(x_train[0])
    # print(x_test[0])
    # print(x_train.shape)
    # print(x_test.shape)
    # print(y_train.shape)

    def objective_function(cfg):
        (x_train, y_train), (x_test,
                             y_test), cls_num = load_dataset(dataset_name)
        epochs_num, run_count = get_default_setting(dataset_name)
        val_error = train(cls_num,
                          epochs_num,
                          cfg,
                          x_train,
                          y_train,
                          x_test,
                          y_test,
                          seed=32)
        print('the validation accuracy is ', 1 - val_error)

        if not os.path.exists(saved_file):
            data = list()
        else:
            with open(saved_file, 'rb') as f:
                data = pickle.load(f)
        data.append([cfg, val_error])

        with open(saved_file, 'wb') as f:
            pickle.dump(data, f)
        return val_error

    cs = create_configspace()
    bo = BO(objective_function,
            cs,
            max_runs=trial_num,
            time_limit_per_trial=10000,
            sample_strategy=mode,
            rng=np.random.RandomState(1))
    bo.run()
Example #5
0
    def __init__(self,
                 task_type,
                 input_data: DataNode,
                 evaluator: _BaseEvaluator,
                 model_id: str,
                 time_limit_per_trans: int,
                 mem_limit_per_trans: int,
                 seed: int,
                 n_jobs=1,
                 number_of_unit_resource=1,
                 time_budget=600,
                 algo='smac'):
        super().__init__(str(__class__.__name__), task_type, input_data, seed)
        self.number_of_unit_resource = number_of_unit_resource
        self.iter_num_per_unit_resource = 10
        self.time_limit_per_trans = time_limit_per_trans
        self.mem_limit_per_trans = mem_limit_per_trans
        self.time_budget = time_budget
        self.evaluator = evaluator
        self.model_id = model_id

        self.incumbent_score = -np.inf
        self.fetch_incumbent = None
        self.baseline_score = -np.inf
        self.start_time = time.time()
        self.hp_config = None
        self.seed = seed
        self.n_jobs = n_jobs

        self.node_dict = dict()

        self.early_stopped_flag = False
        self.is_finished = False
        self.iteration_id = 0

        self.evaluator.parse_needed = True
        # Prepare the hyperparameter space.
        self.hyperparameter_space = self._get_task_hyperparameter_space(
            optimizer=algo)
        if algo == 'smac':
            self.incumbent_config = self.hyperparameter_space.get_default_configuration(
            )
        else:
            self.incumbent_config = None
        self.optimizer = BO(objective_function=self.evaluate_function,
                            config_space=self.hyperparameter_space,
                            max_runs=int(1e10),
                            task_id=self.model_id,
                            time_limit_per_trial=self.time_limit_per_trans,
                            rng=np.random.RandomState(self.seed))
        self.eval_dict = {}
                min_impurity_decrease=self.min_impurity_decrease,
                random_state=self.random_state,
                n_jobs=self.n_jobs,
                class_weight=self.class_weight,
                warm_start=True)

        self.estimator.fit(X, y, sample_weight=sample_weight)
        return self

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        return self.estimator.predict(X)


dataset_list = dataset_str.split(',')
check_datasets(dataset_list)
cs = get_cs()

_run_count = min(int(len(set(cs.sample_configuration(30000))) * 0.75), run_count)
print(_run_count)

for dataset in dataset_list:
    node = load_data(dataset, '../soln-ml/', True, task_type=0)
    _x, _y = node.data[0], node.data[1]
    eval = partial(eval_func, x=_x, y=_y)
    bo = BO(eval, cs, max_runs=_run_count, time_limit_per_trial=600, sample_strategy=mode, rng=np.random.RandomState(1))
    bo.run()
    with open('logs/%s-random_forest-%s-%d.pkl' % (dataset, mode, run_count), 'wb')as f:
        pickle.dump(bo.get_history().data, f)
Example #7
0
def evaluate(mode, dataset, run_id, metric):
    print(mode, dataset, run_id, metric)

    metric = get_metric(metric)
    train_data, test_data = load_train_test_data(dataset,
                                                 task_type=MULTICLASS_CLS)

    cs = _classifiers[algo_name].get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", algo_name)
    cs.add_hyperparameter(model)
    default_hpo_config = cs.get_default_configuration()

    fe_evaluator = ClassificationEvaluator(default_hpo_config,
                                           scorer=metric,
                                           name='fe',
                                           resampling_strategy='holdout',
                                           seed=1)

    hpo_evaluator = ClassificationEvaluator(default_hpo_config,
                                            scorer=metric,
                                            data_node=train_data,
                                            name='hpo',
                                            resampling_strategy='holdout',
                                            seed=1)

    fe_optimizer = BayesianOptimizationOptimizer(task_type=CLASSIFICATION,
                                                 input_data=train_data,
                                                 evaluator=fe_evaluator,
                                                 model_id=algo_name,
                                                 time_limit_per_trans=600,
                                                 mem_limit_per_trans=5120,
                                                 number_of_unit_resource=10,
                                                 seed=1)

    def objective_function(config):
        if benchmark == 'fe':
            return fe_optimizer.evaluate_function(config)
        else:
            return hpo_evaluator(config)

    if mode == 'bo':
        bo = BO(objective_function,
                config_space,
                max_runs=max_runs,
                surrogate_model='prob_rf')
        bo.run()
        print('BO result')
        print(bo.get_incumbent())
        perf = bo.history_container.incumbent_value
        runs = [bo.configurations, bo.perfs]
    elif mode == 'lite_bo':
        from litebo.facade.bo_facade import BayesianOptimization
        bo = BayesianOptimization(objective_function,
                                  config_space,
                                  max_runs=max_runs)
        bo.run()
        print('BO result')
        print(bo.get_incumbent())
        perf = bo.history_container.incumbent_value
        runs = [bo.configurations, bo.perfs]
    elif mode.startswith('tlbo'):
        _, gp_fusion = mode.split('_')
        meta_feature_vec = metafeature_dict[dataset]
        past_datasets = test_datasets.copy()
        if dataset in past_datasets:
            past_datasets.remove(dataset)
        past_history = load_runhistory(past_datasets)

        gp_models = [
            gp_models_dict[dataset_name] for dataset_name in past_datasets
        ]
        tlbo = TLBO(objective_function,
                    config_space,
                    past_history,
                    gp_models=gp_models,
                    dataset_metafeature=meta_feature_vec,
                    max_runs=max_runs,
                    gp_fusion=gp_fusion)
        tlbo.run()
        print('TLBO result')
        print(tlbo.get_incumbent())
        runs = [tlbo.configurations, tlbo.perfs]
        perf = tlbo.history_container.incumbent_value
    else:
        raise ValueError('Invalid mode.')
    file_saved = '%s_%s_%s_result_%d_%d_%s.pkl' % (mode, algo_name, dataset,
                                                   max_runs, run_id, benchmark)
    with open(data_dir + file_saved, 'wb') as f:
        pk.dump([perf, runs], f)
Example #8
0
def test_branin():
    space_dict = {
        "parameters": {
            "x1": {
                "type": "float",
                "bound": [-5, 10],
                "default": 0
            },
            "x2": {
                "type": "float",
                "bound": [0, 15]
            },
        }
    }

    cs = get_config_space_from_dict(space_dict)
    print(cs)

    bo = BayesianOptimization(branin,
                              cs,
                              max_runs=30,
                              time_limit_per_trial=3,
                              logging_dir='logs')
    bo.run()
    inc_value = bo.get_incumbent()
    print('BO', '=' * 30)
    print(inc_value)

    # Evaluate the random search.
    bo = BayesianOptimization(branin,
                              cs,
                              max_runs=30,
                              time_limit_per_trial=3,
                              sample_strategy='random',
                              logging_dir='logs')
    bo.run()
    inc_value = bo.get_incumbent()
    print('RANDOM', '=' * 30)
    print(inc_value)

    # Evaluate batch BO.
    bo = BatchBayesianOptimization(branin,
                                   cs,
                                   max_runs=10,
                                   batch_size=3,
                                   time_limit_per_trial=3,
                                   sample_strategy='median_imputation',
                                   logging_dir='logs')
    bo.run()
    inc_value = bo.get_incumbent()
    print('MEDIAN IMPUTATION BATCH BO', '=' * 30)
    print(inc_value)

    # Evaluate batch BO.
    bo = BatchBayesianOptimization(branin,
                                   cs,
                                   max_runs=10,
                                   batch_size=3,
                                   time_limit_per_trial=3,
                                   sample_strategy='local_penalization',
                                   logging_dir='logs')
    bo.run()
    inc_value = bo.get_incumbent()
    print('LOCAL PENALIZATION BATCH BO', '=' * 30)
    print(inc_value)
Example #9
0
            "default": 0
        },
        "x2": {
            "type": "float",
            "bound": [0, 15]
        },
    }
}

from litebo.utils.config_space.space_utils import get_config_space_from_dict
cs = get_config_space_from_dict(space_dict)
print(cs)

bo = BayesianOptimization(branin,
                          cs,
                          max_runs=90,
                          time_limit_per_trial=3,
                          logging_dir='logs')
bo.run()
inc_value = bo.get_incumbent()
print('BO', '=' * 30)
print(inc_value)

# Evaluate the random search.
bo = BayesianOptimization(branin,
                          cs,
                          max_runs=90,
                          time_limit_per_trial=3,
                          sample_strategy='random',
                          logging_dir='logs')
bo.run()
Example #10
0
def evaluate(mth, dataset, run_id):
    print(mth, dataset, run_id)
    train_data, test_data = load_train_test_data(dataset,
                                                 test_size=0.3,
                                                 task_type=MULTICLASS_CLS)

    def objective_function(config):
        metric = get_metric('bal_acc')
        _, estimator = get_estimator(config.get_dictionary())
        X_train, y_train = train_data.data
        X_test, y_test = test_data.data
        estimator.fit(X_train, y_train)
        return -metric(estimator, X_test, y_test)

    def tpe_objective_function(config):
        metric = get_metric('bal_acc')
        _, estimator = get_estimator(config)
        X_train, y_train = train_data.data
        X_test, y_test = test_data.data
        estimator.fit(X_train, y_train)
        return -metric(estimator, X_test, y_test)

    config_space = get_configspace()

    if mth == 'gp_bo':
        bo = BO(objective_function, config_space, max_runs=max_runs)
        bo.run()
        print('new BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    elif mth == 'rf_bo':
        bo = BO(objective_function,
                config_space,
                surrogate_model='prob_rf',
                max_runs=max_runs)
        bo.run()
        print('new BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    elif mth == 'lite_bo':
        from litebo.facade.bo_facade import BayesianOptimization
        bo = BayesianOptimization(objective_function,
                                  config_space,
                                  max_runs=max_runs)
        bo.run()
        print('lite BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    elif mth == 'smac':
        from smac.scenario.scenario import Scenario
        from smac.facade.smac_facade import SMAC
        # Scenario object
        scenario = Scenario({
            "run_obj": "quality",
            "runcount-limit": max_runs,
            "cs": config_space,
            "deterministic": "true"
        })
        smac = SMAC(scenario=scenario,
                    rng=np.random.RandomState(42),
                    tae_runner=objective_function)
        incumbent = smac.optimize()
        perf_bo = objective_function(incumbent)
        print('SMAC BO result')
        print(perf_bo)
    elif mth == 'tpe':
        config_space = get_configspace('tpe')
        from hyperopt import tpe, fmin, Trials
        trials = Trials()
        fmin(tpe_objective_function,
             config_space,
             tpe.suggest,
             max_runs,
             trials=trials)
        perfs = [trial['result']['loss'] for trial in trials.trials]
        perf_bo = min(perfs)
    elif mth == 'tpe_bo':
        from mindware.components.transfer_learning.tlbo.tpe_optimizer import TPE_BO
        bo = TPE_BO(objective_function, config_space, max_runs=max_runs)
        bo.run()
        print('lite BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    elif mth == 'random_search':
        from mindware.components.transfer_learning.tlbo.tpe_optimizer import TPE_BO
        bo = TPE_BO(objective_function,
                    config_space,
                    surrogate_model=mth,
                    max_runs=max_runs)
        bo.run()
        print('lite BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    else:
        raise ValueError('Invalid method.')
    return perf_bo
Example #11
0
class BayesianOptimizationOptimizer(Optimizer):
    def __init__(self, task_type, input_data: DataNode, evaluator: _BaseEvaluator,
                 model_id: str, time_limit_per_trans: int,
                 mem_limit_per_trans: int,
                 seed: int, n_jobs=1,
                 number_of_unit_resource=1,
                 time_budget=600, algo='smac'):
        super().__init__(str(__class__.__name__), task_type, input_data, seed)
        self.number_of_unit_resource = number_of_unit_resource
        self.iter_num_per_unit_resource = 10
        self.time_limit_per_trans = time_limit_per_trans
        self.mem_limit_per_trans = mem_limit_per_trans
        self.time_budget = time_budget
        self.evaluator = evaluator
        self.model_id = model_id

        self.incumbent_score = -np.inf
        self.fetch_incumbent = None
        self.baseline_score = -np.inf
        self.start_time = time.time()
        self.hp_config = None
        self.seed = seed
        self.n_jobs = n_jobs

        self.node_dict = dict()

        self.early_stopped_flag = False
        self.is_finished = False
        self.iteration_id = 0

        self.evaluator.parse_needed = True
        # Prepare the hyperparameter space.
        self.hyperparameter_space = self._get_task_hyperparameter_space(optimizer=algo)
        if algo == 'smac':
            self.incumbent_config = self.hyperparameter_space.get_default_configuration()
        else:
            self.incumbent_config = None
        self.optimizer = BO(objective_function=self.evaluate_function,
                            config_space=self.hyperparameter_space,
                            max_runs=int(1e10),
                            task_id=self.model_id,
                            time_limit_per_trial=self.time_limit_per_trans,
                            rng=np.random.RandomState(self.seed))
        self.eval_dict = {}

    def evaluate_function(self, config, name='fe', data_subsample_ratio=1.0):
        """
            The config is the configuration that specifies the FE pipeline.
        :param name:
        :param data_subsample_ratio:
        :param config:
        :return: the evaluation score.
        """
        input_node = self.root_node
        output_node = self._parse(input_node, config)
        output_node.config = config
        return self.evaluator(self.hp_config, data_node=output_node, name='fe',
                              data_subsample_ratio=data_subsample_ratio)

    def optimize(self):
        """
            Interface enables user to use this FE optimizer only.
        :return:
        """
        self.is_finished = False
        while not self.is_finished:
            self.logger.debug('=' * 50)
            self.logger.debug('Start the ITERATION: %d' % self.iteration_id)
            self.logger.debug('=' * 50)
            self.iterate()
            if self.start_time + self.time_budget < time.time():
                self.is_finished = True
        return self.incumbent

    def iterate(self):
        result = None
        for _ in range(self.number_of_unit_resource):
            result = self._iterate()
        return result

    def _iterate(self):
        _start_time = time.time()
        for _ in range(self.iter_num_per_unit_resource):
            _, status, _, info = self.optimizer.iterate()
            if status == 1:
                print(info)

        runhistory = self.optimizer.get_history()
        self.eval_dict = {(fe_config, self.evaluator.hpo_config): -score for fe_config, score in
                          runhistory.data.items()}
        self.incumbent_config, iter_incumbent_score = runhistory.get_incumbents()[0]
        iter_incumbent_score = -iter_incumbent_score
        iteration_cost = time.time() - _start_time
        if iter_incumbent_score > self.incumbent_score:
            self.incumbent_score = iter_incumbent_score
            self.incumbent = self._parse(self.root_node, self.incumbent_config)

        # incumbent_score: the large the better
        return self.incumbent_score, iteration_cost, self.incumbent

    @ignore_warnings([ConvergenceWarning, UserWarning, RuntimeWarning])
    def _parse(self, data_node: DataNode, config, record=False, skip_balance=False):
        """
            Transform the data node based on the pipeline specified by configuration.
        :param data_node:
        :param config:
        :param record:
        :return: the resulting data node.
        """
        # Remove the indicator in config_dict.
        config_dict = config.get_dictionary().copy()

        pre1_id = config_dict['preprocessor1']
        config_dict.pop('preprocessor1')
        pre2_id = config_dict['preprocessor2']
        config_dict.pop('preprocessor2')
        if skip_balance:
            bal_id = 'empty'
        else:
            if 'balancer' in config_dict:
                bal_id = config_dict['balancer']
                config_dict.pop('balancer')
            else:
                bal_id = 'empty'
        gen_id = config_dict['generator']
        config_dict.pop('generator')
        res_id = config_dict['rescaler']
        config_dict.pop('rescaler')
        sel_id = config_dict['selector']
        config_dict.pop('selector')

        def tran_operate(id, tran_set, config, node):
            if id != "empty":
                _config = {}
                for key in config:
                    if id in key:
                        config_name = key.split(':')[1]
                        _config[config_name] = config[key]
                tran = tran_set[id](**_config)
                output_node = tran.operate(node)
                return output_node, tran
            return node, None

        _node = data_node.copy_()
        # Preprocessor1
        _node, pre1_tran = tran_operate(pre1_id, _preprocessor1, config_dict, _node)

        # Preprocessor2
        _node, pre2_tran = tran_operate(pre2_id, _preprocessor2, config_dict, _node)

        # Balancer
        _balancer = _bal_balancer if self.if_bal else _imb_balancer
        _node, bal_tran = tran_operate(bal_id, _balancer, config_dict, _node)

        # Rescaler
        _node, res_tran = tran_operate(res_id, _rescaler, config_dict, _node)

        # Generator
        _node, gen_tran = tran_operate(gen_id, _generator, config_dict, _node)

        # Selector
        _node, sel_tran = tran_operate(sel_id, _selector, config_dict, _node)

        _node.config = config
        if record:
            return _node, [pre1_tran, pre2_tran, bal_tran, res_tran, gen_tran, sel_tran]
        return _node

    def _get_task_hyperparameter_space(self, optimizer='tpe'):
        """
            Fetch the underlying hyperparameter space for feature engineering.
            Pipeline Space:
                1. preprocess: continous_discretizer
                2. preprocess: discrete_categorizer,
                3. balancer: weight_balancer,
                             data_balancer.
                4. scaler: normalizer, scaler, quantile.
                5. generator: all generators.
                6. selector: all selectors.
        :return: hyper space.
        """
        if self.task_type in CLS_TASKS:
            self.trans_types = TRANS_CANDIDATES['classification'].copy()
        else:
            self.trans_types = TRANS_CANDIDATES['regression'].copy()

        # Avoid transformations, which would take too long
        # Combinations of non-linear models with feature learning.
        # feature_learning = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"]
        if self.task_type in CLS_TASKS:
            classifier_set = ["adaboost", "decision_tree", "extra_trees",
                              "gradient_boosting", "k_nearest_neighbors",
                              "libsvm_svc", "random_forest", "gaussian_nb",
                              "decision_tree", "lightgbm"]

            if self.model_id in classifier_set:
                for tran_id in [12, 13, 15]:
                    if tran_id in self.trans_types:
                        self.trans_types.remove(tran_id)

        generator_dict = self._get_configuration_space(_generator, self.trans_types, optimizer=optimizer)
        rescaler_dict = self._get_configuration_space(_rescaler, self.trans_types, optimizer=optimizer)
        selector_dict = self._get_configuration_space(_selector, self.trans_types, optimizer=optimizer)
        preprocessor1_dict = self._get_configuration_space(_preprocessor1, optimizer=optimizer)
        preprocessor2_dict = self._get_configuration_space(_preprocessor2, optimizer=optimizer)
        if self.task_type in CLS_TASKS:
            _balancer = _bal_balancer if self.if_bal else _imb_balancer
            balancer_dict = self._get_configuration_space(_balancer, optimizer=optimizer)
        else:
            balancer_dict = None
        cs = self._build_hierachical_configspace(preprocessor1_dict, preprocessor2_dict,
                                                 generator_dict, rescaler_dict,
                                                 selector_dict, balancer_dict, optimizer=optimizer)
        return cs

    def _get_configuration_space(self, builtin_transformers, trans_type=None, optimizer='smac'):
        config_dict = dict()
        for tran_key in builtin_transformers:
            tran = builtin_transformers[tran_key]
            tran_id = tran().type
            if trans_type is None or tran_id in trans_type:
                try:
                    sub_configuration_space = builtin_transformers[tran_key].get_hyperparameter_search_space(
                        optimizer=optimizer)
                    config_dict[tran_key] = sub_configuration_space
                except:
                    if optimizer == 'smac':
                        config_dict[tran_key] = ConfigurationSpace()
                    elif optimizer == 'tpe':
                        config_dict[tran_key] = {}
        return config_dict

    def _build_hierachical_configspace(self, pre_config1, pre_config2, gen_config, res_config, sel_config,
                                       bal_config=None, optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            self._add_hierachical_configspace(cs, pre_config1, "preprocessor1")
            self._add_hierachical_configspace(cs, pre_config2, "preprocessor2")
            if bal_config is not None:
                self._add_hierachical_configspace(cs, bal_config, "balancer")
            self._add_hierachical_configspace(cs, gen_config, "generator")
            self._add_hierachical_configspace(cs, res_config, "rescaler")
            self._add_hierachical_configspace(cs, sel_config, "selector")
            return cs
        elif optimizer == 'tpe':
            from hyperopt import hp
            space = {}

            def dict2hi(dictionary):
                hi_list = list()
                for key in dictionary:
                    hi_list.append((key, dictionary[key]))
                return hi_list

            space['preprocessor1'] = hp.choice('preprocessor1', dict2hi(pre_config1))
            space['preprocessor2'] = hp.choice('preprocessor2', dict2hi(pre_config2))
            space['generator'] = hp.choice('generator', dict2hi(gen_config))
            if bal_config is not None:
                space['balancer'] = hp.choice('balancer', dict2hi(bal_config))
            space['rescaler'] = hp.choice('rescaler', dict2hi(res_config))
            space['selector'] = hp.choice('selector', dict2hi(sel_config))
            return space

    def _add_hierachical_configspace(self, cs, config, parent_name):
        config_cand = list(config.keys())
        config_cand.append("empty")
        config_option = CategoricalHyperparameter(parent_name, config_cand,
                                                  default_value=config_cand[-1])
        cs.add_hyperparameter(config_option)
        for config_item in config_cand:
            if config_item == 'empty':
                sub_configuration_space = ConfigurationSpace()
            else:
                sub_configuration_space = config[config_item]
            parent_hyperparameter = {'parent': config_option,
                                     'value': config_item}
            cs.add_configuration_space(config_item, sub_configuration_space,
                                       parent_hyperparameter=parent_hyperparameter)

    def fetch_nodes(self, n=5):
        runhistory = self.optimizer.get_history()
        hist_dict = runhistory.data
        default_config = self.hyperparameter_space.get_default_configuration()

        if len(hist_dict) > 0:
            min_list = sorted(hist_dict.items(), key=lambda item: item[1])
        else:
            min_list = [(default_config, None)]

        if len(min_list) < 50:
            min_n = list(min_list[:n])
        else:
            amplification_rate = 3
            chosen_idxs = np.arange(n) * amplification_rate
            min_n = [min_list[idx] for idx in chosen_idxs]

        # Filter out.
        if len(hist_dict) > 0:
            default_perf = hist_dict[default_config]
            min_n = list(filter(lambda x: x[1] < default_perf, min_n))

        if default_config not in [x[0] for x in min_n]:
            # Get default configuration.
            min_n.append((default_config, runhistory.data[default_config]))

        if self.incumbent_config not in [x[0] for x in min_n]:
            min_n.append((self.incumbent_config, runhistory.data[self.incumbent_config]))

        node_list = []
        for i, config in enumerate(min_n):
            if config[0] in self.node_dict:
                node_list.append(self.node_dict[config[0]][0])
                continue

            try:
                node, tran_list = self._parse(self.root_node, config[0], record=True)
                node.config = config[0]
                if node.data[0].shape[1] == 0:
                    continue
                if self.fetch_incumbent is None:
                    self.fetch_incumbent = node  # Update incumbent node
                node_list.append(node)
                self.node_dict[config[0]] = [node, tran_list]
            except:
                print("Re-parse failed on config %s" % str(config[0]))
        return node_list

    def fetch_nodes_by_config(self, config_list):
        node_list = []
        self.logger.info("Re-parse %d configs" % len(config_list))
        for i, config in enumerate(config_list):
            try:
                if config is None:
                    config = self.hyperparameter_space.get_default_configuration()

                if config in self.node_dict:
                    node_list.append(self.node_dict[config][0])
                    continue

                node, tran_list = self._parse(self.root_node, config, record=True)
                node.config = config
                if node.data[0].shape[1] == 0:
                    continue
                if self.fetch_incumbent is None:
                    # Ensure the config_list is sorted by performance
                    self.fetch_incumbent = node  # Update incumbent node
                node_list.append(node)
                self.node_dict[config] = [node, tran_list]
                assert None not in self.node_dict
            except Exception as e:
                node_list.append(None)
                print("Re-parse failed on config %s" % str(config))
        return node_list

    def apply(self, data_node: DataNode, ref_node: DataNode, phase='test'):
        input_node = data_node.copy_()
        fe_config = ref_node.config
        if ref_node is None:
            return data_node
        elif fe_config in self.node_dict:
            fe_trans_list = self.node_dict[fe_config][1]
            for i, tran in enumerate(fe_trans_list):
                if phase == 'test' and i == 2:  # Disable balancer
                    continue
                if tran is not None:
                    input_node = tran.operate(input_node)
        else:
            self.logger.info("Ref node config:")
            self.logger.info(str(fe_config))
            self.logger.info("Node history in optimizer:")
            self.logger.info(str(self.node_dict))
            raise ValueError("Ref node not in history!")
        return input_node
class SMACOptimizer(BaseHPOptimizer):
    def __init__(self, evaluator, config_space, time_limit=None, evaluation_limit=None,
                 per_run_time_limit=300, per_run_mem_limit=1024, output_dir='./',
                 trials_per_iter=1, seed=1, n_jobs=1):
        super().__init__(evaluator, config_space, seed)
        self.time_limit = time_limit
        self.evaluation_num_limit = evaluation_limit
        self.trials_per_iter = trials_per_iter
        self.per_run_time_limit = per_run_time_limit
        self.per_run_mem_limit = per_run_mem_limit
        self.output_dir = output_dir

        self.optimizer = BO(objective_function=self.evaluator,
                            config_space=config_space,
                            max_runs=int(1e10),
                            task_id=None,
                            time_limit_per_trial=self.per_run_time_limit,
                            rng=np.random.RandomState(self.seed))

        self.trial_cnt = 0
        self.configs = list()
        self.perfs = list()
        self.incumbent_perf = float("-INF")
        self.incumbent_config = self.config_space.get_default_configuration()
        # Estimate the size of the hyperparameter space.
        hp_num = len(self.config_space.get_hyperparameters())
        if hp_num == 0:
            self.config_num_threshold = 0
        else:
            _threshold = int(len(set(self.config_space.sample_configuration(10000))) * 0.75)
            self.config_num_threshold = _threshold
        self.logger.debug('The maximum trial number in HPO is: %d' % self.config_num_threshold)
        self.maximum_config_num = min(600, self.config_num_threshold)
        self.early_stopped_flag = False
        self.eval_dict = {}

    def run(self):
        while True:
            evaluation_num = len(self.perfs)
            if self.evaluation_num_limit is not None and evaluation_num > self.evaluation_num_limit:
                break
            if self.time_limit is not None and time.time() - self.start_time > self.time_limit:
                break
            self.iterate()
        return np.max(self.perfs)

    def iterate(self):
        _start_time = time.time()
        for _ in range(self.trials_per_iter):
            if len(self.configs) >= self.maximum_config_num:
                self.early_stopped_flag = True
                self.logger.warning('Already explored 70 percentage of the '
                                    'hyperspace or maximum configuration number met: %d!' % self.maximum_config_num)
                break
            _config, _status, _perf, _ = self.optimizer.iterate()
            if _status == SUCCESS:
                self.configs.append(_config)
                self.perfs.append(-_perf)

        runhistory = self.optimizer.get_history()
        self.eval_dict = {(None, hpo_config): -score for hpo_config, score in
                          runhistory.data.items()}
        self.incumbent_config, self.incumbent_perf = runhistory.get_incumbents()[0]
        self.incumbent_perf = -self.incumbent_perf
        iteration_cost = time.time() - _start_time
        # incumbent_perf: the large the better
        return self.incumbent_perf, iteration_cost, self.incumbent_config
Example #13
0
    "gamma", ["auto", "value"],
    default_value="auto")  # only rbf, poly, sigmoid
gamma_value = UniformFloatHyperparameter("gamma_value",
                                         0.0001,
                                         8,
                                         default_value=1)
cs.add_hyperparameters([gamma, gamma_value])
# We only activate gamma_value if gamma is set to "value"
cs.add_condition(InCondition(child=gamma_value, parent=gamma,
                             values=["value"]))
# And again we can restrict the use of gamma in general to the choice of the kernel
cs.add_condition(
    InCondition(child=gamma, parent=kernel, values=["rbf", "poly", "sigmoid"]))

# Example call of the function
# It returns: Status, Cost, Runtime, Additional Infos
def_value = svm_from_cfg(cs.get_default_configuration())
print("Default Value: %.2f" % (def_value))

# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
bo = BayesianOptimization(svm_from_cfg,
                          cs,
                          max_runs=30,
                          time_limit_per_trial=30,
                          logging_dir='logs')
bo.run()
inc_value = bo.get_incumbent()

print(inc_value)
Example #14
0
class SMACOptimizer(BaseHPOptimizer):
    def __init__(self,
                 evaluator,
                 config_space,
                 time_limit=None,
                 evaluation_limit=None,
                 per_run_time_limit=600,
                 per_run_mem_limit=1024,
                 output_dir='./',
                 trials_per_iter=1,
                 seed=1,
                 n_jobs=1):
        super().__init__(evaluator, config_space, seed)
        self.time_limit = time_limit
        self.evaluation_num_limit = evaluation_limit
        self.trials_per_iter = trials_per_iter
        self.per_run_time_limit = per_run_time_limit
        self.per_run_mem_limit = per_run_mem_limit
        self.output_dir = output_dir

        self.optimizer = BO(objective_function=self.evaluator,
                            config_space=config_space,
                            max_runs=int(1e10),
                            task_id=None,
                            rng=np.random.RandomState(self.seed))

        self.trial_cnt = 0
        self.configs = list()
        self.perfs = list()
        self.incumbent_perf = float("-INF")
        self.incumbent_config = self.config_space.get_default_configuration()
        # Estimate the size of the hyperparameter space.
        hp_num = len(self.config_space.get_hyperparameters())
        if hp_num == 0:
            self.config_num_threshold = 0
        else:
            _threshold = int(
                len(set(self.config_space.sample_configuration(10000))) * 0.75)
            self.config_num_threshold = _threshold
        self.logger.debug('HP_THRESHOLD is: %d' % self.config_num_threshold)
        self.maximum_config_num = min(600, self.config_num_threshold)
        self.early_stopped_flag = False

    def run(self):
        while True:
            evaluation_num = len(self.perfs)
            if self.evaluation_num_limit is not None and evaluation_num > self.evaluation_num_limit:
                break
            if self.time_limit is not None and time.time(
            ) - self.start_time > self.time_limit:
                break
            self.iterate()
        return np.max(self.perfs)

    def iterate(self):
        _start_time = time.time()
        for _ in range(self.trials_per_iter):
            if len(self.configs) >= self.maximum_config_num:
                self.early_stopped_flag = True
                self.logger.warning(
                    'Already explored 70 percentage of the '
                    'hp space or maximum configuration number: %d!' %
                    self.maximum_config_num)
                break
            self.optimizer.iterate()

        runhistory = self.optimizer.get_history()
        self.incumbent_config, self.incumbent_perf = runhistory.get_incumbents(
        )[-1]
        self.incumbent_perf = 1 - self.incumbent_perf
        iteration_cost = time.time() - _start_time
        return self.incumbent_perf, iteration_cost, self.incumbent_config

    def optimize(self):
        self.optimizer.optimize()

        runhistory = self.optimizer.get_history()
        self.incumbent_config, self.incumbent_perf = runhistory.get_incumbents(
        )[-1]
        self.incumbent_perf = 1 - self.incumbent_perf
        return self.incumbent_config, self.incumbent_perf
def evaluate(mth, dataset, run_id):
    print(mth, dataset, run_id)
    train_data, test_data = load_train_test_data(dataset,
                                                 test_size=0.3,
                                                 task_type=MULTICLASS_CLS)

    cs = _classifiers[algo_name].get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", algo_name)
    cs.add_hyperparameter(model)
    default_hpo_config = cs.get_default_configuration()
    metric = get_metric('bal_acc')

    fe_evaluator = ClassificationEvaluator(default_hpo_config,
                                           scorer=metric,
                                           name='fe',
                                           resampling_strategy='holdout',
                                           seed=1)
    fe_optimizer = BayesianOptimizationOptimizer(task_type=MULTICLASS_CLS,
                                                 input_data=train_data,
                                                 evaluator=fe_evaluator,
                                                 model_id=algo_name,
                                                 time_limit_per_trans=600,
                                                 mem_limit_per_trans=5120,
                                                 number_of_unit_resource=10,
                                                 seed=1)
    config_space = fe_optimizer.hyperparameter_space

    def objective_function(config):
        return fe_optimizer.evaluate_function(config)

    if mth == 'gp_bo':
        bo = BO(objective_function, config_space, max_runs=max_runs)
        bo.run()
        print('new BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    elif mth == 'lite_bo':
        from litebo.facade.bo_facade import BayesianOptimization
        bo = BayesianOptimization(objective_function,
                                  config_space,
                                  max_runs=max_runs)
        bo.run()
        print('lite BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    elif mth == 'smac':
        from smac.scenario.scenario import Scenario
        from smac.facade.smac_facade import SMAC
        # Scenario object
        scenario = Scenario({
            "run_obj": "quality",
            "runcount-limit": max_runs,
            "cs": config_space,
            "deterministic": "true"
        })
        smac = SMAC(scenario=scenario,
                    rng=np.random.RandomState(42),
                    tae_runner=objective_function)
        incumbent = smac.optimize()
        perf_bo = objective_function(incumbent)
        print('SMAC BO result')
        print(perf_bo)
    else:
        raise ValueError('Invalid method.')
    return perf_bo
Example #16
0
class SMACOptimizer(BaseOptimizer):
    def __init__(self, evaluator, config_space, name, time_limit=None, evaluation_limit=None,
                 per_run_time_limit=300, per_run_mem_limit=1024, output_dir='./',
                 inner_iter_num_per_iter=1, seed=1, n_jobs=1):
        super().__init__(evaluator, config_space, name, seed)
        self.time_limit = time_limit
        self.evaluation_num_limit = evaluation_limit
        self.inner_iter_num_per_iter = inner_iter_num_per_iter
        self.per_run_time_limit = per_run_time_limit
        self.per_run_mem_limit = per_run_mem_limit
        self.output_dir = output_dir
        self.optimizer = BO(objective_function=self.evaluator,
                            config_space=config_space,
                            max_runs=int(1e10),
                            task_id=None,
                            time_limit_per_trial=self.per_run_time_limit,
                            rng=np.random.RandomState(self.seed))

        self.trial_cnt = 0
        self.configs = list()
        self.perfs = list()
        self.exp_output = dict()
        self.incumbent_perf = float("-INF")
        self.incumbent_config = self.config_space.get_default_configuration()
        # Estimate the size of the hyperparameter space.
        hp_num = len(self.config_space.get_hyperparameters())
        if hp_num == 0:
            self.config_num_threshold = 0
        else:
            _threshold = int(len(set(self.config_space.sample_configuration(5000))))
            self.config_num_threshold = _threshold
        self.logger.debug('The maximum trial number in HPO is: %d' % self.config_num_threshold)
        self.maximum_config_num = min(600, self.config_num_threshold)
        self.eval_dict = {}

    def run(self):
        while True:
            evaluation_num = len(self.perfs)
            if self.evaluation_num_limit is not None and evaluation_num > self.evaluation_num_limit:
                break
            if self.time_limit is not None and time.time() - self.start_time > self.time_limit:
                break
            self.iterate()
        return np.max(self.perfs)

    def iterate(self, budget=MAX_INT):
        _start_time = time.time()

        if len(self.configs) == 0 and self.init_hpo_iter_num is not None:
            inner_iter_num = self.init_hpo_iter_num
            print('initial hpo trial num is set to %d' % inner_iter_num)
        else:
            inner_iter_num = self.inner_iter_num_per_iter

        for _ in range(inner_iter_num):
            if len(self.configs) >= self.maximum_config_num:
                self.early_stopped_flag = True
                self.logger.warning('Already explored 70 percentage of the '
                                    'hyperspace or maximum configuration number met: %d!' % self.maximum_config_num)
                break
            if time.time() - _start_time > budget:
                self.logger.warning('Time limit exceeded!')
                break
            _config, _status, _perf, _ = self.optimizer.iterate()
            if _status == SUCCESS:
                self.exp_output[time.time()] = (_config, _perf)
                self.configs.append(_config)
                self.perfs.append(-_perf)
                self.combine_tmp_config_path()

        runhistory = self.optimizer.get_history()
        if self.name == 'hpo':
            if hasattr(self.evaluator, 'fe_config'):
                fe_config = self.evaluator.fe_config
            else:
                fe_config = None
            self.eval_dict = {(fe_config, hpo_config): [-score, time.time()] for hpo_config, score in
                              runhistory.data.items()}
        else:
            if hasattr(self.evaluator, 'hpo_config'):
                hpo_config = self.evaluator.hpo_config
            else:
                hpo_config = None
            self.eval_dict = {(fe_config, hpo_config): [-score, time.time()] for fe_config, score in
                              runhistory.data.items()}

        if len(runhistory.get_incumbents()) > 0:
            self.incumbent_config, self.incumbent_perf = runhistory.get_incumbents()[0]
            self.incumbent_perf = -self.incumbent_perf
        iteration_cost = time.time() - _start_time
        # incumbent_perf: the large the better
        return self.incumbent_perf, iteration_cost, self.incumbent_config