예제 #1
0
    def __init__(self,
                 objective_function,
                 config_space,
                 sample_strategy='bo',
                 time_limit_per_trial=180,
                 max_runs=200,
                 logging_dir='logs',
                 initial_configurations=None,
                 initial_batch=1,
                 batch_size=3,
                 task_id=None,
                 rng=None):
        super().__init__(config_space, task_id, output_dir=logging_dir)
        self.logger = super()._get_logger(self.__class__.__name__)
        if rng is None:
            run_id, rng = get_rng()

        self.batch_size = batch_size
        self.init_batch = initial_batch
        self.max_iterations = max_runs
        self.iteration_id = 0
        self.sls_max_steps = None
        self.n_sls_iterations = 5
        self.sls_n_steps_plateau_walk = 10
        self.time_limit_per_trial = time_limit_per_trial
        self.default_obj_value = MAXINT
        self.sample_strategy = sample_strategy

        self.configurations = list()
        self.failed_configurations = list()
        self.perfs = list()

        # Initialize the basic component in BO.
        self.config_space.seed(rng.randint(MAXINT))
        self.objective_function = objective_function
        types, bounds = get_types(config_space)
        # TODO: what is the feature array.
        self.model = RandomForestWithInstances(types=types,
                                               bounds=bounds,
                                               seed=rng.randint(MAXINT))
        if self.sample_strategy == 'local_penalization':
            self.acquisition_function = LPEI(self.model)
        else:
            self.acquisition_function = EI(self.model)
        self.optimizer = InterleavedLocalAndRandomSearch(
            acquisition_function=self.acquisition_function,
            config_space=self.config_space,
            rng=np.random.RandomState(seed=rng.randint(MAXINT)),
            max_steps=self.sls_max_steps,
            n_steps_plateau_walk=self.sls_n_steps_plateau_walk,
            n_sls_iterations=self.n_sls_iterations)
        self._random_search = RandomSearch(self.acquisition_function,
                                           self.config_space, rng)
        self.random_configuration_chooser = ChooserProb(prob=0.25, rng=rng)
예제 #2
0
파일: base.py 프로젝트: jhj0411jhj/open-box
def build_surrogate(func_str='prf',
                    config_space=None,
                    rng=None,
                    history_hpo_data=None):
    assert config_space is not None
    func_str = func_str.lower()
    types, bounds = get_types(config_space)
    seed = rng.randint(MAXINT)
    if func_str == 'prf':
        return RandomForestWithInstances(types=types, bounds=bounds, seed=seed)

    if func_str == 'lightgbm':
        return LightGBM(config_space, types=types, bounds=bounds, seed=seed)

    elif func_str.startswith('gp'):
        return create_gp_model(model_type=func_str,
                               config_space=config_space,
                               types=types,
                               bounds=bounds,
                               rng=rng)
    elif func_str.startswith('mfgpe'):
        from openbox.surrogate.tlbo.mfgpe import MFGPE
        inner_surrogate_type = 'prf'
        return MFGPE(config_space,
                     history_hpo_data,
                     seed,
                     surrogate_type=inner_surrogate_type,
                     num_src_hpo_trial=-1)
    elif func_str.startswith('tlbo'):
        print('the current surrogate is', func_str)
        if 'rgpe' in func_str:
            from openbox.surrogate.tlbo.rgpe import RGPE
            inner_surrogate_type = func_str.split('_')[-1]
            return RGPE(config_space,
                        history_hpo_data,
                        seed,
                        surrogate_type=inner_surrogate_type,
                        num_src_hpo_trial=-1)
        elif 'sgpr' in func_str:
            from openbox.surrogate.tlbo.stacking_gpr import SGPR
            inner_surrogate_type = func_str.split('_')[-1]
            return SGPR(config_space,
                        history_hpo_data,
                        seed,
                        surrogate_type=inner_surrogate_type,
                        num_src_hpo_trial=-1)
        elif 'topov3' in func_str:
            from openbox.surrogate.tlbo.topo_variant3 import TOPO_V3
            inner_surrogate_type = func_str.split('_')[-1]
            return TOPO_V3(config_space,
                           history_hpo_data,
                           seed,
                           surrogate_type=inner_surrogate_type,
                           num_src_hpo_trial=-1)
        else:
            raise ValueError('Invalid string %s for tlbo surrogate!' %
                             func_str)
    else:
        raise ValueError('Invalid string %s for surrogate!' % func_str)
예제 #3
0
    def __init__(self, types: np.ndarray,
                 bounds: np.ndarray, s_max, eta, weight_list, fusion_method, **kwargs):
        super().__init__(types=types, bounds=bounds, **kwargs)

        self.s_max = s_max
        self.eta = eta
        self.fusion = fusion_method
        self.surrogate_weight = dict()
        self.surrogate_container = dict()
        self.surrogate_r = list()
        self.weight_list = weight_list
        for index, item in enumerate(np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)):
            r = int(item)
            self.surrogate_r.append(r)
            self.surrogate_weight[r] = self.weight_list[index]
            self.surrogate_container[r] = RandomForestWithInstances(types=types, bounds=bounds)
예제 #4
0
class BatchBayesianOptimization(BaseFacade):
    def __init__(self, objective_function, config_space,
                 sample_strategy='bo',
                 time_limit_per_trial=180,
                 max_runs=200,
                 logging_dir='logs',
                 initial_configurations=None,
                 initial_batch=1,
                 batch_size=3,
                 task_id='default_task_id',
                 rng=None):
        super().__init__(config_space, task_id, output_dir=logging_dir)
        self.logger = super()._get_logger(self.__class__.__name__)
        if rng is None:
            run_id, rng = get_rng()

        self.batch_size = batch_size
        self.init_batch = initial_batch
        self.max_iterations = max_runs
        self.iteration_id = 0
        self.sls_max_steps = None
        self.n_sls_iterations = 5
        self.sls_n_steps_plateau_walk = 10
        self.time_limit_per_trial = time_limit_per_trial
        self.default_obj_value = MAXINT
        self.sample_strategy = sample_strategy

        self.configurations = list()
        self.failed_configurations = list()
        self.perfs = list()

        # Initialize the basic component in BO.
        self.config_space.seed(rng.randint(MAXINT))
        self.objective_function = objective_function
        types, bounds = get_types(config_space)
        # TODO: what is the feature array.
        self.model = RandomForestWithInstances(types=types, bounds=bounds, seed=rng.randint(MAXINT))
        if self.sample_strategy == 'local_penalization':
            self.acquisition_function = LPEI(self.model)
        else:
            self.acquisition_function = EI(self.model)
        self.optimizer = InterleavedLocalAndRandomSearch(
            acquisition_function=self.acquisition_function,
            config_space=self.config_space,
            rng=np.random.RandomState(seed=rng.randint(MAXINT)),
            max_steps=self.sls_max_steps,
            n_steps_plateau_walk=self.sls_n_steps_plateau_walk,
            n_sls_iterations=self.n_sls_iterations
        )
        self._random_search = RandomSearch(
            self.acquisition_function, self.config_space, rng
        )
        self.random_configuration_chooser = ChooserProb(prob=0.25, rng=rng)

    def run(self):
        while self.iteration_id < self.max_iterations:
            self.iterate()

    def iterate(self):
        if len(self.configurations) == 0:
            X = np.array([])
        else:
            failed_configs = list() if self.max_y is None else self.failed_configurations.copy()
            X = convert_configurations_to_array(self.configurations + failed_configs)

        failed_perfs = list() if self.max_y is None else [self.max_y] * len(self.failed_configurations)
        Y = np.array(self.perfs + failed_perfs, dtype=np.float64)

        config_list = self.choose_next(X, Y)
        trial_state_list = list()
        trial_info_list = list()
        perf_list = list()

        for i, config in enumerate(config_list):
            trial_state_list.append(SUCCESS)
            trial_info_list.append(None)
            perf_list.append(None)

            if config not in (self.configurations + self.failed_configurations):
                # Evaluate this configuration.
                try:
                    args, kwargs = (config,), dict()
                    timeout_status, _result = time_limit(self.objective_function, self.time_limit_per_trial,
                                                         args=args, kwargs=kwargs)
                    if timeout_status:
                        raise TimeoutException(
                            'Timeout: time limit for this evaluation is %.1fs' % self.time_limit_per_trial)
                    else:
                        perf_list[i] = _result
                except Exception as e:
                    if isinstance(e, TimeoutException):
                        trial_state_list[i] = TIMEOUT
                    else:
                        traceback.print_exc(file=sys.stdout)
                        trial_state_list[i] = FAILED
                    perf_list[i] = MAXINT
                    trial_info_list[i] = str(e)
                    self.logger.error(trial_info_list[i])

                if trial_state_list[i] == SUCCESS and perf_list[i] < MAXINT:
                    if len(self.configurations) == 0:
                        self.default_obj_value = perf_list[i]

                    self.configurations.append(config)
                    self.perfs.append(perf_list[i])
                    self.history_container.add(config, perf_list[i])

                    self.perc = np.percentile(self.perfs, self.scale_perc)
                    self.min_y = np.min(self.perfs)
                    self.max_y = np.max(self.perfs)
                else:
                    self.failed_configurations.append(config)
            else:
                self.logger.debug('This configuration has been evaluated! Skip it.')
                if config in self.configurations:
                    config_idx = self.configurations.index(config)
                    trial_state_list[i], perf_list[i] = SUCCESS, self.perfs[config_idx]
                else:
                    trial_state_list[i], perf_list[i] = FAILED, MAXINT

        self.iteration_id += 1
        self.logger.info(
            'Iteration-%d, objective improvement: %.4f' % (
                self.iteration_id, max(0, self.default_obj_value - min(perf_list))))
        return config_list, trial_state_list, perf_list, trial_info_list

    def choose_next(self, X: np.ndarray, Y: np.ndarray):
        # Select a batch of configs to evaluate next.
        _config_num = X.shape[0]
        batch_configs_list = list()

        if _config_num < self.init_batch * self.batch_size or self.sample_strategy == 'random':
            for i in range(self.batch_size):
                batch_configs_list.append(self.sample_config())
            return batch_configs_list

        if self.sample_strategy == 'median_imputation':
            estimated_y = np.mean(Y)
            batch_history_container = copy.deepcopy(self.history_container)
            for i in range(self.batch_size):
                self.model.train(X, Y)

                incumbent_value = batch_history_container.get_incumbents()[0][1]

                self.acquisition_function.update(model=self.model, eta=incumbent_value,
                                                 num_data=len(batch_history_container.data))

                challengers = self.optimizer.maximize(
                    runhistory=batch_history_container,
                    num_points=5000,
                    random_configuration_chooser=self.random_configuration_chooser
                )

                is_repeated_config = True
                repeated_time = 0
                curr_batch_config = None
                while is_repeated_config:
                    try:
                        curr_batch_config = challengers.challengers[repeated_time]
                        batch_history_container.add(curr_batch_config, estimated_y)
                    except ValueError:
                        is_repeated_config = True
                        repeated_time += 1
                    else:
                        is_repeated_config = False

                batch_configs_list.append(curr_batch_config)
                X = np.append(X, curr_batch_config.get_array().reshape(1, -1), axis=0)
                Y = np.append(Y, estimated_y)
                estimated_y = np.mean(Y)

        elif self.sample_strategy == 'local_penalization':
            self.model.train(X, Y)
            incumbent_value = self.history_container.get_incumbents()[0][1]
            # L = self.estimate_L(X)
            for i in range(self.batch_size):
                self.acquisition_function.update(model=self.model, eta=incumbent_value,
                                                 num_data=len(self.history_container.data),
                                                 batch_configs=batch_configs_list)

                challengers = self.optimizer.maximize(
                    runhistory=self.history_container,
                    num_points=5000,
                    random_configuration_chooser=self.random_configuration_chooser
                )
                batch_configs_list.append(challengers.challengers[0])
        else:
            raise ValueError('Invalid sampling strategy - %s.' % self.sample_strategy)

        return batch_configs_list

    def sample_config(self):
        config = None
        _sample_cnt, _sample_limit = 0, 10000
        while True:
            _sample_cnt += 1
            config = self.config_space.sample_configuration()
            if config not in (self.configurations + self.failed_configurations):
                break
            if _sample_cnt >= _sample_limit:
                config = self.config_space.sample_configuration()
                break
        return config
예제 #5
0
파일: base.py 프로젝트: PKU-DAIR/open-box
def build_surrogate(func_str='gp',
                    config_space=None,
                    rng=None,
                    history_hpo_data=None):
    assert config_space is not None
    func_str = func_str.lower()
    types, bounds = get_types(config_space)
    seed = rng.randint(MAXINT)
    if func_str == 'prf':
        try:
            from openbox.surrogate.base.rf_with_instances import RandomForestWithInstances
            return RandomForestWithInstances(types=types,
                                             bounds=bounds,
                                             seed=seed)
        except ModuleNotFoundError:
            from openbox.surrogate.base.rf_with_instances_sklearn import skRandomForestWithInstances
            print(
                '[Build Surrogate] Use probabilistic random forest based on scikit-learn. For better performance, '
                'please install pyrfr: '
                'https://open-box.readthedocs.io/en/latest/installation/install_pyrfr.html'
            )
            return skRandomForestWithInstances(types=types,
                                               bounds=bounds,
                                               seed=seed)

    elif func_str == 'sk_prf':
        from openbox.surrogate.base.rf_with_instances_sklearn import skRandomForestWithInstances
        return skRandomForestWithInstances(types=types,
                                           bounds=bounds,
                                           seed=seed)

    elif func_str == 'lightgbm':
        from openbox.surrogate.lightgbm import LightGBM
        return LightGBM(config_space, types=types, bounds=bounds, seed=seed)

    if func_str == 'random_forest':
        from openbox.surrogate.skrf import RandomForestSurrogate
        return RandomForestSurrogate(config_space,
                                     types=types,
                                     bounds=bounds,
                                     seed=seed)

    elif func_str.startswith('gp'):
        from openbox.surrogate.base.build_gp import create_gp_model
        return create_gp_model(model_type=func_str,
                               config_space=config_space,
                               types=types,
                               bounds=bounds,
                               rng=rng)
    elif func_str.startswith('mfgpe'):
        from openbox.surrogate.tlbo.mfgpe import MFGPE
        inner_surrogate_type = 'prf'
        return MFGPE(config_space,
                     history_hpo_data,
                     seed,
                     surrogate_type=inner_surrogate_type,
                     num_src_hpo_trial=-1)
    elif func_str.startswith('tlbo'):
        print('the current surrogate is', func_str)
        if 'rgpe' in func_str:
            from openbox.surrogate.tlbo.rgpe import RGPE
            inner_surrogate_type = func_str.split('_')[-1]
            return RGPE(config_space,
                        history_hpo_data,
                        seed,
                        surrogate_type=inner_surrogate_type,
                        num_src_hpo_trial=-1)
        elif 'sgpr' in func_str:
            from openbox.surrogate.tlbo.stacking_gpr import SGPR
            inner_surrogate_type = func_str.split('_')[-1]
            return SGPR(config_space,
                        history_hpo_data,
                        seed,
                        surrogate_type=inner_surrogate_type,
                        num_src_hpo_trial=-1)
        elif 'topov3' in func_str:
            from openbox.surrogate.tlbo.topo_variant3 import TOPO_V3
            inner_surrogate_type = func_str.split('_')[-1]
            return TOPO_V3(config_space,
                           history_hpo_data,
                           seed,
                           surrogate_type=inner_surrogate_type,
                           num_src_hpo_trial=-1)
        else:
            raise ValueError('Invalid string %s for tlbo surrogate!' %
                             func_str)
    else:
        raise ValueError('Invalid string %s for surrogate!' % func_str)
예제 #6
0
class BayesianOptimization(BaseFacade):
    def __init__(self, objective_function, config_space,
                 sample_strategy='bo',
                 time_limit_per_trial=180,
                 max_runs=200,
                 logging_dir='logs',
                 initial_configurations=None,
                 initial_runs=3,
                 task_id='default_task_id',
                 rng=None):
        super().__init__(config_space, task_id, output_dir=logging_dir)
        self.logger = super()._get_logger(self.__class__.__name__)
        if rng is None:
            run_id, rng = get_rng()

        self.init_num = initial_runs
        self.max_iterations = max_runs
        self.iteration_id = 0
        self.sls_max_steps = None
        self.n_sls_iterations = 5
        self.sls_n_steps_plateau_walk = 10
        self.time_limit_per_trial = time_limit_per_trial
        self.default_obj_value = MAXINT
        self.sample_strategy = sample_strategy

        self.configurations = list()
        self.failed_configurations = list()
        self.perfs = list()

        # Initialize the basic component in BO.
        self.config_space.seed(rng.randint(MAXINT))
        self.objective_function = objective_function
        types, bounds = get_types(config_space)
        # TODO: what is the feature array.
        self.model = RandomForestWithInstances(types=types, bounds=bounds, seed=rng.randint(MAXINT))
        self.acquisition_function = EI(self.model)
        self.optimizer = InterleavedLocalAndRandomSearch(
            acquisition_function=self.acquisition_function,
            config_space=self.config_space,
            rng=np.random.RandomState(seed=rng.randint(MAXINT)),
            max_steps=self.sls_max_steps,
            n_steps_plateau_walk=self.sls_n_steps_plateau_walk,
            n_sls_iterations=self.n_sls_iterations
        )
        self._random_search = RandomSearch(
            self.acquisition_function, self.config_space, rng
        )
        self.random_configuration_chooser = ChooserProb(prob=0.25, rng=rng)

    def run(self):
        while self.iteration_id < self.max_iterations:
            self.iterate()

    def iterate(self):
        if len(self.configurations) == 0:
            X = np.array([])
        else:
            failed_configs = list() if self.max_y is None else self.failed_configurations.copy()
            X = convert_configurations_to_array(self.configurations + failed_configs)

        failed_perfs = list() if self.max_y is None else [self.max_y] * len(self.failed_configurations)
        Y = np.array(self.perfs + failed_perfs, dtype=np.float64)

        config = self.choose_next(X, Y)

        trial_state = SUCCESS
        trial_info = None

        if config not in (self.configurations + self.failed_configurations):
            # Evaluate this configuration.
            try:
                args, kwargs = (config,), dict()
                timeout_status, _result = time_limit(self.objective_function, self.time_limit_per_trial,
                                                     args=args, kwargs=kwargs)
                if timeout_status:
                    raise TimeoutException(
                        'Timeout: time limit for this evaluation is %.1fs' % self.time_limit_per_trial)
                else:
                    perf = MAXINT if _result is None else _result
            except Exception as e:
                if isinstance(e, TimeoutException):
                    trial_state = TIMEOUT
                else:
                    traceback.print_exc(file=sys.stdout)
                    trial_state = FAILED
                perf = MAXINT
                trial_info = str(e)
                self.logger.error(trial_info)

            if trial_state == SUCCESS and perf < MAXINT:
                if len(self.configurations) == 0:
                    self.default_obj_value = perf

                self.configurations.append(config)
                self.perfs.append(perf)
                self.history_container.add(config, perf)

                self.perc = np.percentile(self.perfs, self.scale_perc)
                self.min_y = np.min(self.perfs)
                self.max_y = np.max(self.perfs)
            else:
                self.failed_configurations.append(config)
        else:
            self.logger.debug('This configuration has been evaluated! Skip it.')
            if config in self.configurations:
                config_idx = self.configurations.index(config)
                trial_state, perf = SUCCESS, self.perfs[config_idx]
            else:
                trial_state, perf = FAILED, MAXINT

        self.iteration_id += 1
        self.logger.info(
            'Iteration-%d, objective improvement: %.4f' % (self.iteration_id, max(0, self.default_obj_value - perf)))
        return config, trial_state, perf, trial_info

    def choose_next(self, X: np.ndarray, Y: np.ndarray):
        _config_num = X.shape[0]
        if _config_num < self.init_num:
            default_config = self.config_space.get_default_configuration()
            if default_config not in (self.configurations + self.failed_configurations):
                return default_config
            else:
                return self._random_search.maximize(runhistory=self.history_container, num_points=1)[0]

        if self.sample_strategy == 'random':
            return self.sample_config()
        elif self.sample_strategy == 'bo':
            if self.random_configuration_chooser.check(self.iteration_id):
                return self.sample_config()
            else:
                self.model.train(X, Y)

                incumbent_value = self.history_container.get_incumbents()[0][1]

                self.acquisition_function.update(model=self.model, eta=incumbent_value,
                                                 num_data=len(self.history_container.data))

                challengers = self.optimizer.maximize(
                    runhistory=self.history_container,
                    num_points=5000,
                    random_configuration_chooser=self.random_configuration_chooser
                )

                return challengers.challengers[0]
        else:
            raise ValueError('Invalid sampling strategy - %s.' % self.sample_strategy)

    def sample_config(self):
        config = None
        _sample_cnt, _sample_limit = 0, 10000
        while True:
            _sample_cnt += 1
            config = self.config_space.sample_configuration()
            if config not in (self.configurations + self.failed_configurations):
                break
            if _sample_cnt >= _sample_limit:
                config = self.config_space.sample_configuration()
                break
        return config
예제 #7
0
    def __init__(self,
                 objective_func,
                 config_space: ConfigurationSpace,
                 R,
                 eta=3,
                 skip_outer_loop=0,
                 rand_prob=0.3,
                 use_bohb=False,
                 init_weight=None,
                 update_enable=True,
                 weight_method='rank_loss_p_norm',
                 fusion_method='idp',
                 power_num=3,
                 random_state=1,
                 method_id='mqAsyncMFES',
                 restart_needed=True,
                 time_limit_per_trial=600,
                 runtime_limit=None,
                 seed=1,
                 ip='',
                 port=13579,
                 authkey=b'abc'):
        super().__init__(objective_func,
                         config_space,
                         R,
                         eta=eta,
                         skip_outer_loop=skip_outer_loop,
                         random_state=random_state,
                         method_id=method_id,
                         restart_needed=restart_needed,
                         time_limit_per_trial=time_limit_per_trial,
                         runtime_limit=runtime_limit,
                         ip=ip,
                         port=port,
                         authkey=authkey)
        self.seed = seed
        self.last_n_iteration = None
        self.use_bohb_strategy = use_bohb
        self.update_enable = update_enable
        self.fusion_method = fusion_method
        # Parameter for weight method `rank_loss_p_norm`.
        self.power_num = power_num
        # Specify the weight learning method.
        self.weight_method = weight_method
        self.weight_update_id = 0
        self.weight_changed_cnt = 0

        if init_weight is None:
            init_weight = [1. / self.s_max] * self.s_max + [0.]
            assert len(init_weight) == (self.s_max + 1)
        self.logger.info("Initialize weight to %s" %
                         init_weight[:self.s_max + 1])
        types, bounds = get_types(config_space)

        if not self.use_bohb_strategy:
            self.surrogate = RandomForestEnsemble(types, bounds, self.s_max,
                                                  self.eta, init_weight,
                                                  self.fusion_method)
        else:
            self.surrogate = RandomForestWithInstances(types,
                                                       bounds,
                                                       seed=self.seed)
        self.acquisition_function = EI(model=self.surrogate)

        self.iterate_id = 0
        self.iterate_r = list()
        self.hist_weights = list()

        # Saving evaluation statistics in Hyperband.
        self.target_x = dict()
        self.target_y = dict()
        for index, item in enumerate(
                np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)):
            r = int(item)
            self.iterate_r.append(r)
            self.target_x[r] = list()
            self.target_y[r] = list()

        # BO optimizer settings.
        self.history_container = HistoryContainer(task_id=self.method_name)
        self.sls_max_steps = None
        self.n_sls_iterations = 5
        self.sls_n_steps_plateau_walk = 10
        self.rng = np.random.RandomState(seed=self.seed)
        self.acq_optimizer = InterleavedLocalAndRandomSearch(
            acquisition_function=self.acquisition_function,
            config_space=self.config_space,
            rng=self.rng,
            max_steps=self.sls_max_steps,
            n_steps_plateau_walk=self.sls_n_steps_plateau_walk,
            n_sls_iterations=self.n_sls_iterations,
            rand_prob=0.0,
        )
        self.random_configuration_chooser = ChooserProb(prob=rand_prob,
                                                        rng=self.rng)
        self.random_check_idx = 0
예제 #8
0
class async_mqMFES(async_mqHyperband):
    """
    The implementation of Asynchronous MFES (combine ASHA and MFES)
    """
    def __init__(self,
                 objective_func,
                 config_space: ConfigurationSpace,
                 R,
                 eta=3,
                 skip_outer_loop=0,
                 rand_prob=0.3,
                 use_bohb=False,
                 init_weight=None,
                 update_enable=True,
                 weight_method='rank_loss_p_norm',
                 fusion_method='idp',
                 power_num=3,
                 random_state=1,
                 method_id='mqAsyncMFES',
                 restart_needed=True,
                 time_limit_per_trial=600,
                 runtime_limit=None,
                 seed=1,
                 ip='',
                 port=13579,
                 authkey=b'abc'):
        super().__init__(objective_func,
                         config_space,
                         R,
                         eta=eta,
                         skip_outer_loop=skip_outer_loop,
                         random_state=random_state,
                         method_id=method_id,
                         restart_needed=restart_needed,
                         time_limit_per_trial=time_limit_per_trial,
                         runtime_limit=runtime_limit,
                         ip=ip,
                         port=port,
                         authkey=authkey)
        self.seed = seed
        self.last_n_iteration = None
        self.use_bohb_strategy = use_bohb
        self.update_enable = update_enable
        self.fusion_method = fusion_method
        # Parameter for weight method `rank_loss_p_norm`.
        self.power_num = power_num
        # Specify the weight learning method.
        self.weight_method = weight_method
        self.weight_update_id = 0
        self.weight_changed_cnt = 0

        if init_weight is None:
            init_weight = [1. / self.s_max] * self.s_max + [0.]
            assert len(init_weight) == (self.s_max + 1)
        self.logger.info("Initialize weight to %s" %
                         init_weight[:self.s_max + 1])
        types, bounds = get_types(config_space)

        if not self.use_bohb_strategy:
            self.surrogate = RandomForestEnsemble(types, bounds, self.s_max,
                                                  self.eta, init_weight,
                                                  self.fusion_method)
        else:
            self.surrogate = RandomForestWithInstances(types,
                                                       bounds,
                                                       seed=self.seed)
        self.acquisition_function = EI(model=self.surrogate)

        self.iterate_id = 0
        self.iterate_r = list()
        self.hist_weights = list()

        # Saving evaluation statistics in Hyperband.
        self.target_x = dict()
        self.target_y = dict()
        for index, item in enumerate(
                np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)):
            r = int(item)
            self.iterate_r.append(r)
            self.target_x[r] = list()
            self.target_y[r] = list()

        # BO optimizer settings.
        self.history_container = HistoryContainer(task_id=self.method_name)
        self.sls_max_steps = None
        self.n_sls_iterations = 5
        self.sls_n_steps_plateau_walk = 10
        self.rng = np.random.RandomState(seed=self.seed)
        self.acq_optimizer = InterleavedLocalAndRandomSearch(
            acquisition_function=self.acquisition_function,
            config_space=self.config_space,
            rng=self.rng,
            max_steps=self.sls_max_steps,
            n_steps_plateau_walk=self.sls_n_steps_plateau_walk,
            n_sls_iterations=self.n_sls_iterations,
            rand_prob=0.0,
        )
        self.random_configuration_chooser = ChooserProb(prob=rand_prob,
                                                        rng=self.rng)
        self.random_check_idx = 0

    def update_observation(self, config, perf, n_iteration):
        rung_id = self.get_rung_id(self.bracket, n_iteration)

        updated = False
        for job in self.bracket[rung_id]['jobs']:
            _job_status, _config, _perf, _extra_conf = job
            if _config == config:
                assert _job_status == RUNNING
                job[0] = COMPLETED
                job[2] = perf
                updated = True
                break
        assert updated
        # print('=== bracket after update_observation:', self.get_bracket_status(self.bracket))

        configs_running = list()
        for _config in self.bracket[rung_id]['configs']:
            if _config not in self.target_x[n_iteration]:
                configs_running.append(_config)
        value_imputed = np.median(self.target_y[n_iteration])

        n_iteration = int(n_iteration)
        self.target_x[n_iteration].append(config)
        self.target_y[n_iteration].append(perf)

        if n_iteration == self.R:
            self.incumbent_configs.append(config)
            self.incumbent_perfs.append(perf)
            # Update history container.
            self.history_container.add(config, perf)

        # Refit the ensemble surrogate model.
        configs_train = self.target_x[n_iteration] + configs_running
        results_train = self.target_y[n_iteration] + [value_imputed
                                                      ] * len(configs_running)
        results_train = np.array(std_normalization(results_train),
                                 dtype=np.float64)
        if not self.use_bohb_strategy:
            self.surrogate.train(
                convert_configurations_to_array(configs_train),
                results_train,
                r=n_iteration)
        else:
            if n_iteration == self.R:
                self.surrogate.train(
                    convert_configurations_to_array(configs_train),
                    results_train)

    def choose_next(self):
        """
        sample a config according to MFES. give iterations according to Hyperband strategy.
        """
        next_config = None
        next_n_iteration = self.get_next_n_iteration()
        next_rung_id = self.get_rung_id(self.bracket, next_n_iteration)

        # Update weight when the inner loop of hyperband is finished
        if self.last_n_iteration != next_n_iteration and not self.use_bohb_strategy:
            if self.update_enable and self.weight_update_id > self.s_max:
                self.update_weight()
            self.weight_update_id += 1
        self.last_n_iteration = next_n_iteration

        # sample config
        excluded_configs = self.bracket[next_rung_id]['configs']
        if len(self.target_y[self.iterate_r[-1]]) == 0:
            next_config = sample_configuration(
                self.config_space, excluded_configs=excluded_configs)
        else:
            # Like BOHB, sample a fixed percentage of random configurations.
            self.random_check_idx += 1
            if self.random_configuration_chooser.check(self.random_check_idx):
                next_config = sample_configuration(
                    self.config_space, excluded_configs=excluded_configs)
            else:
                acq_configs = self.get_bo_candidates()
                for config in acq_configs:
                    if config not in self.bracket[next_rung_id]['configs']:
                        next_config = config
                        break
                if next_config is None:
                    self.logger.warning(
                        'Cannot get a non duplicate configuration from bo candidates. '
                        'Sample a random one.')
                    next_config = sample_configuration(
                        self.config_space, excluded_configs=excluded_configs)

        next_extra_conf = {}
        return next_config, next_n_iteration, next_extra_conf

    def get_bo_candidates(self):
        std_incumbent_value = np.min(
            std_normalization(self.target_y[self.iterate_r[-1]]))
        # Update surrogate model in acquisition function.
        self.acquisition_function.update(model=self.surrogate,
                                         eta=std_incumbent_value,
                                         num_data=len(self.incumbent_configs))

        challengers = self.acq_optimizer.maximize(
            runhistory=self.history_container,
            num_points=5000,
        )
        return challengers.challengers

    @staticmethod
    def calculate_preserving_order_num(y_pred, y_true):
        array_size = len(y_pred)
        assert len(y_true) == array_size

        total_pair_num, order_preserving_num = 0, 0
        for idx in range(array_size):
            for inner_idx in range(idx + 1, array_size):
                if bool(y_true[idx] > y_true[inner_idx]) == bool(
                        y_pred[idx] > y_pred[inner_idx]):
                    order_preserving_num += 1
                total_pair_num += 1
        return order_preserving_num, total_pair_num

    def update_weight(self):
        start_time = time.time()

        max_r = self.iterate_r[-1]
        incumbent_configs = self.target_x[max_r]
        test_x = convert_configurations_to_array(incumbent_configs)
        test_y = np.array(self.target_y[max_r], dtype=np.float64)

        r_list = self.surrogate.surrogate_r
        K = len(r_list)

        if len(test_y) >= 3:
            # Get previous weights
            if self.weight_method == 'rank_loss_p_norm':
                preserving_order_p = list()
                preserving_order_nums = list()
                for i, r in enumerate(r_list):
                    fold_num = 5
                    if i != K - 1:
                        mean, var = self.surrogate.surrogate_container[
                            r].predict(test_x)
                        tmp_y = np.reshape(mean, -1)
                        preorder_num, pair_num = self.calculate_preserving_order_num(
                            tmp_y, test_y)
                        preserving_order_p.append(preorder_num / pair_num)
                        preserving_order_nums.append(preorder_num)
                    else:
                        if len(test_y) < 2 * fold_num:
                            preserving_order_p.append(0)
                        else:
                            # 5-fold cross validation.
                            kfold = KFold(n_splits=fold_num)
                            cv_pred = np.array([0] * len(test_y))
                            for train_idx, valid_idx in kfold.split(test_x):
                                train_configs, train_y = test_x[
                                    train_idx], test_y[train_idx]
                                valid_configs, valid_y = test_x[
                                    valid_idx], test_y[valid_idx]
                                types, bounds = get_types(self.config_space)
                                _surrogate = RandomForestWithInstances(
                                    types=types, bounds=bounds)
                                _surrogate.train(train_configs, train_y)
                                pred, _ = _surrogate.predict(valid_configs)
                                cv_pred[valid_idx] = pred.reshape(-1)
                            preorder_num, pair_num = self.calculate_preserving_order_num(
                                cv_pred, test_y)
                            preserving_order_p.append(preorder_num / pair_num)
                            preserving_order_nums.append(preorder_num)

                trans_order_weight = np.array(preserving_order_p)
                power_sum = np.sum(np.power(trans_order_weight,
                                            self.power_num))
                new_weights = np.power(trans_order_weight,
                                       self.power_num) / power_sum

            elif self.weight_method == 'rank_loss_prob':
                # For basic surrogate i=1:K-1.
                mean_list, var_list = list(), list()
                for i, r in enumerate(r_list[:-1]):
                    mean, var = self.surrogate.surrogate_container[r].predict(
                        test_x)
                    mean_list.append(np.reshape(mean, -1))
                    var_list.append(np.reshape(var, -1))
                sample_num = 100
                min_probability_array = [0] * K
                for _ in range(sample_num):
                    order_preseving_nums = list()

                    # For basic surrogate i=1:K-1.
                    for idx in range(K - 1):
                        sampled_y = self.rng.normal(mean_list[idx],
                                                    var_list[idx])
                        _num, _ = self.calculate_preserving_order_num(
                            sampled_y, test_y)
                        order_preseving_nums.append(_num)

                    fold_num = 5
                    # For basic surrogate i=K. cv
                    if len(test_y) < 2 * fold_num:
                        order_preseving_nums.append(0)
                    else:
                        # 5-fold cross validation.
                        kfold = KFold(n_splits=fold_num)
                        cv_pred = np.array([0] * len(test_y))
                        for train_idx, valid_idx in kfold.split(test_x):
                            train_configs, train_y = test_x[train_idx], test_y[
                                train_idx]
                            valid_configs, valid_y = test_x[valid_idx], test_y[
                                valid_idx]
                            types, bounds = get_types(self.config_space)
                            _surrogate = RandomForestWithInstances(
                                types=types, bounds=bounds)
                            _surrogate.train(train_configs, train_y)
                            _pred, _var = _surrogate.predict(valid_configs)
                            sampled_pred = self.rng.normal(
                                _pred.reshape(-1), _var.reshape(-1))
                            cv_pred[valid_idx] = sampled_pred
                        _num, _ = self.calculate_preserving_order_num(
                            cv_pred, test_y)
                        order_preseving_nums.append(_num)
                    max_id = np.argmax(order_preseving_nums)
                    min_probability_array[max_id] += 1
                new_weights = np.array(min_probability_array) / sample_num
            else:
                raise ValueError('Invalid weight method: %s!' %
                                 self.weight_method)
        else:
            old_weights = list()
            for i, r in enumerate(r_list):
                _weight = self.surrogate.surrogate_weight[r]
                old_weights.append(_weight)
            new_weights = old_weights.copy()

        self.logger.info(
            '[%s] %d-th Updating weights: %s' %
            (self.weight_method, self.weight_changed_cnt, str(new_weights)))

        # Assign the weight to each basic surrogate.
        for i, r in enumerate(r_list):
            self.surrogate.surrogate_weight[r] = new_weights[i]
        self.weight_changed_cnt += 1
        # Save the weight data.
        self.hist_weights.append(new_weights)
        dir_path = os.path.join(self.data_directory, 'saved_weights')
        file_name = 'mfes_weights_%s.npy' % (self.method_name, )
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)
        np.save(os.path.join(dir_path, file_name),
                np.asarray(self.hist_weights))
        self.logger.info(
            'update_weight() cost %.2fs. new weights are saved to %s' %
            (time.time() - start_time, os.path.join(dir_path, file_name)))

    def get_weights(self):
        return self.hist_weights
예제 #9
0
    def update_weight(self):
        start_time = time.time()

        max_r = self.iterate_r[-1]
        incumbent_configs = self.target_x[max_r]
        test_x = convert_configurations_to_array(incumbent_configs)
        test_y = np.array(self.target_y[max_r], dtype=np.float64)

        r_list = self.surrogate.surrogate_r
        K = len(r_list)

        if len(test_y) >= 3:
            # Get previous weights
            if self.weight_method == 'rank_loss_p_norm':
                preserving_order_p = list()
                preserving_order_nums = list()
                for i, r in enumerate(r_list):
                    fold_num = 5
                    if i != K - 1:
                        mean, var = self.surrogate.surrogate_container[
                            r].predict(test_x)
                        tmp_y = np.reshape(mean, -1)
                        preorder_num, pair_num = self.calculate_preserving_order_num(
                            tmp_y, test_y)
                        preserving_order_p.append(preorder_num / pair_num)
                        preserving_order_nums.append(preorder_num)
                    else:
                        if len(test_y) < 2 * fold_num:
                            preserving_order_p.append(0)
                        else:
                            # 5-fold cross validation.
                            kfold = KFold(n_splits=fold_num)
                            cv_pred = np.array([0] * len(test_y))
                            for train_idx, valid_idx in kfold.split(test_x):
                                train_configs, train_y = test_x[
                                    train_idx], test_y[train_idx]
                                valid_configs, valid_y = test_x[
                                    valid_idx], test_y[valid_idx]
                                types, bounds = get_types(self.config_space)
                                _surrogate = RandomForestWithInstances(
                                    types=types, bounds=bounds)
                                _surrogate.train(train_configs, train_y)
                                pred, _ = _surrogate.predict(valid_configs)
                                cv_pred[valid_idx] = pred.reshape(-1)
                            preorder_num, pair_num = self.calculate_preserving_order_num(
                                cv_pred, test_y)
                            preserving_order_p.append(preorder_num / pair_num)
                            preserving_order_nums.append(preorder_num)

                trans_order_weight = np.array(preserving_order_p)
                power_sum = np.sum(np.power(trans_order_weight,
                                            self.power_num))
                new_weights = np.power(trans_order_weight,
                                       self.power_num) / power_sum

            elif self.weight_method == 'rank_loss_prob':
                # For basic surrogate i=1:K-1.
                mean_list, var_list = list(), list()
                for i, r in enumerate(r_list[:-1]):
                    mean, var = self.surrogate.surrogate_container[r].predict(
                        test_x)
                    mean_list.append(np.reshape(mean, -1))
                    var_list.append(np.reshape(var, -1))
                sample_num = 100
                min_probability_array = [0] * K
                for _ in range(sample_num):
                    order_preseving_nums = list()

                    # For basic surrogate i=1:K-1.
                    for idx in range(K - 1):
                        sampled_y = self.rng.normal(mean_list[idx],
                                                    var_list[idx])
                        _num, _ = self.calculate_preserving_order_num(
                            sampled_y, test_y)
                        order_preseving_nums.append(_num)

                    fold_num = 5
                    # For basic surrogate i=K. cv
                    if len(test_y) < 2 * fold_num:
                        order_preseving_nums.append(0)
                    else:
                        # 5-fold cross validation.
                        kfold = KFold(n_splits=fold_num)
                        cv_pred = np.array([0] * len(test_y))
                        for train_idx, valid_idx in kfold.split(test_x):
                            train_configs, train_y = test_x[train_idx], test_y[
                                train_idx]
                            valid_configs, valid_y = test_x[valid_idx], test_y[
                                valid_idx]
                            types, bounds = get_types(self.config_space)
                            _surrogate = RandomForestWithInstances(
                                types=types, bounds=bounds)
                            _surrogate.train(train_configs, train_y)
                            _pred, _var = _surrogate.predict(valid_configs)
                            sampled_pred = self.rng.normal(
                                _pred.reshape(-1), _var.reshape(-1))
                            cv_pred[valid_idx] = sampled_pred
                        _num, _ = self.calculate_preserving_order_num(
                            cv_pred, test_y)
                        order_preseving_nums.append(_num)
                    max_id = np.argmax(order_preseving_nums)
                    min_probability_array[max_id] += 1
                new_weights = np.array(min_probability_array) / sample_num
            else:
                raise ValueError('Invalid weight method: %s!' %
                                 self.weight_method)
        else:
            old_weights = list()
            for i, r in enumerate(r_list):
                _weight = self.surrogate.surrogate_weight[r]
                old_weights.append(_weight)
            new_weights = old_weights.copy()

        self.logger.info(
            '[%s] %d-th Updating weights: %s' %
            (self.weight_method, self.weight_changed_cnt, str(new_weights)))

        # Assign the weight to each basic surrogate.
        for i, r in enumerate(r_list):
            self.surrogate.surrogate_weight[r] = new_weights[i]
        self.weight_changed_cnt += 1
        # Save the weight data.
        self.hist_weights.append(new_weights)
        dir_path = os.path.join(self.data_directory, 'saved_weights')
        file_name = 'mfes_weights_%s.npy' % (self.method_name, )
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)
        np.save(os.path.join(dir_path, file_name),
                np.asarray(self.hist_weights))
        self.logger.info(
            'update_weight() cost %.2fs. new weights are saved to %s' %
            (time.time() - start_time, os.path.join(dir_path, file_name)))