Ejemplo n.º 1
0
    def __call__(self, configurations: Union[List[Configuration], np.ndarray], convert=True, **kwargs):
        """Computes the acquisition value for a given X

        Parameters
        ----------
        configurations : list
            The configurations where the acquisition function
            should be evaluated.
        convert : bool

        Returns
        -------
        np.ndarray(N, 1)
            acquisition values for X
        """
        if convert:
            X = convert_configurations_to_array(configurations)
        else:
            X = configurations  # to be compatible with multi-objective acq to call single acq
        if len(X.shape) == 1:
            X = X[np.newaxis, :]

        acq = self._compute(X, **kwargs)
        if np.any(np.isnan(acq)):
            idx = np.where(np.isnan(acq))[0]
            acq[idx, :] = -np.finfo(np.float).max
        return acq
Ejemplo n.º 2
0
    def update_observation(self, config, perf, n_iteration):
        rung_id = self.get_rung_id(self.bracket, n_iteration)

        updated = False
        for job in self.bracket[rung_id]['jobs']:
            _job_status, _config, _perf, _extra_conf = job
            if _config == config:
                assert _job_status == RUNNING
                job[0] = COMPLETED
                job[2] = perf
                updated = True
                break
        assert updated
        # print('=== bracket after update_observation:', self.get_bracket_status(self.bracket))

        configs_running = list()
        for _config in self.bracket[rung_id]['configs']:
            if _config not in self.target_x[n_iteration]:
                configs_running.append(_config)
        value_imputed = np.median(self.target_y[n_iteration])

        n_iteration = int(n_iteration)
        self.target_x[n_iteration].append(config)
        self.target_y[n_iteration].append(perf)

        if n_iteration == self.R:
            self.incumbent_configs.append(config)
            self.incumbent_perfs.append(perf)
            # Update history container.
            self.history_container.add(config, perf)

        # Refit the ensemble surrogate model.
        configs_train = self.target_x[n_iteration] + configs_running
        results_train = self.target_y[n_iteration] + [value_imputed
                                                      ] * len(configs_running)
        results_train = np.array(std_normalization(results_train),
                                 dtype=np.float64)
        if not self.use_bohb_strategy:
            self.surrogate.train(
                convert_configurations_to_array(configs_train),
                results_train,
                r=n_iteration)
        else:
            if n_iteration == self.R:
                self.surrogate.train(
                    convert_configurations_to_array(configs_train),
                    results_train)
Ejemplo n.º 3
0
    def get_suggestion(self, history_container=None):
        self.logger.info('#Call get_suggestion. len of running configs = %d.' % len(self.running_configs))

        if history_container is None:
            history_container = self.history_container

        num_config_all = len(history_container.configurations) + len(self.running_configs)
        num_config_successful = len(history_container.successful_perfs)

        if (num_config_all < self.init_num) or \
                num_config_successful < self.bo_start_n or \
                self.optimization_strategy == 'random':
            if num_config_all >= len(self.initial_configurations):
                _config = self.sample_random_configs(1, history_container)[0]
            else:
                _config = self.initial_configurations[num_config_all]
            self.running_configs.append(_config)
            return _config

        X = convert_configurations_to_array(history_container.configurations)
        Y = history_container.get_transformed_perfs()
        # cY = history_container.get_transformed_constraint_perfs()

        if self.batch_strategy == 'median_imputation':
            # set bilog_transform=False to get real cY for estimating median
            cY = history_container.get_transformed_constraint_perfs(bilog_transform=False)

            estimated_y = np.median(Y, axis=0).reshape(-1).tolist()
            estimated_c = np.median(cY, axis=0).tolist() if self.num_constraints > 0 else None
            batch_history_container = copy.deepcopy(history_container)
            # imputation
            for config in self.running_configs:
                observation = Observation(config, SUCCESS, estimated_c, estimated_y, None)
                batch_history_container.update_observation(observation)

            # use super class get_suggestion
            _config = super().get_suggestion(batch_history_container)

        elif self.batch_strategy == 'local_penalization':
            # local_penalization only supports single objective with no constraint
            self.surrogate_model.train(X, Y)
            incumbent_value = history_container.get_incumbents()[0][1]
            self.acquisition_function.update(model=self.surrogate_model, eta=incumbent_value,
                                             num_data=len(history_container.data),
                                             batch_configs=self.running_configs)

            challengers = self.optimizer.maximize(
                runhistory=history_container,
                num_points=5000
            )
            _config = challengers.challengers[0]
        else:
            raise ValueError('Invalid sampling strategy - %s.' % self.batch_strategy)

        self.running_configs.append(_config)
        return _config
Ejemplo n.º 4
0
    def fit_kde_models(self, history_container):
        num_config_successful = len(history_container.successful_perfs)
        if num_config_successful <= self.min_points_in_model - 1:
            self.logger.debug(
                "Only %i run(s) available, need more than %s -> can't build model!"
                % (num_config_successful, self.min_points_in_model + 1))
            return

        train_configs = convert_configurations_to_array(
            history_container.configurations)
        train_losses = history_container.get_transformed_perfs()

        n_good = max(self.min_points_in_model,
                     (self.top_n_percent * train_configs.shape[0]) // 100)
        # n_bad = min(max(self.min_points_in_model, ((100-self.top_n_percent)*train_configs.shape[0])//100), 10)
        n_bad = max(self.min_points_in_model,
                    ((100 - self.top_n_percent) * train_configs.shape[0]) //
                    100)

        # Refit KDE for the current budget
        idx = np.argsort(train_losses)

        train_data_good = self.impute_conditional_data(
            train_configs[idx[:n_good]])
        train_data_bad = self.impute_conditional_data(
            train_configs[idx[n_good:n_good + n_bad]])

        if train_data_good.shape[0] <= train_data_good.shape[1]:
            return
        if train_data_bad.shape[0] <= train_data_bad.shape[1]:
            return

        # more expensive crossvalidation method
        # bw_estimation = 'cv_ls'

        # quick rule of thumb
        bw_estimation = 'normal_reference'

        bad_kde = sm.nonparametric.KDEMultivariate(data=train_data_bad,
                                                   var_type=self.kde_vartypes,
                                                   bw=bw_estimation)
        good_kde = sm.nonparametric.KDEMultivariate(data=train_data_good,
                                                    var_type=self.kde_vartypes,
                                                    bw=bw_estimation)

        bad_kde.bw = np.clip(bad_kde.bw, self.min_bandwidth, None)
        good_kde.bw = np.clip(good_kde.bw, self.min_bandwidth, None)

        self.kde_models = {'good': good_kde, 'bad': bad_kde}

        # update probs for the categorical parameters for later sampling
        self.logger.debug(
            'done building a new model based on %i/%i split\nBest loss for this budget:%f\n\n\n\n\n'
            % (n_good, n_bad, np.min(train_losses)))
Ejemplo n.º 5
0
 def get_regressor(self):
     # Train transfer learning regressor.
     for idx, hpo_evaluation_data in enumerate(self.source_hpo_data):
         print('Build the %d-th residual GPs.' % idx)
         _X, _y = list(), list()
         for _config, _config_perf in list(
                 hpo_evaluation_data.items())[:self.num_src_hpo_trial]:
             _X.append(_config)
             _y.append(_config_perf)
         X = convert_configurations_to_array(_X)
         y = np.array(_y, dtype=np.float64)
         self.train_regressor(X, y)
Ejemplo n.º 6
0
    def get_suggestion(self):
        if len(self.configurations) == 0:
            X = np.array([])
        else:
            failed_configs = list(
            ) if self.max_y is None else self.failed_configurations.copy()
            X = convert_configurations_to_array(self.configurations +
                                                failed_configs)

        num_failed_trial = len(self.failed_configurations)
        failed_perfs = list() if self.max_y is None else [self.max_y
                                                          ] * num_failed_trial
        Y = np.array(self.perfs + failed_perfs, dtype=np.float64)

        num_config_evaluated = len(self.perfs + self.failed_configurations)
        if num_config_evaluated < self.init_num:
            return self.initial_configurations[num_config_evaluated]

        if self.optimization_strategy == 'random':
            return self.sample_random_configs(1)[0]
        elif self.optimization_strategy == 'bo':
            self.surrogate_model.train(X, Y)
            incumbent_value = self.history_container.get_incumbents()[0][1]
            self.acquisition_function.update(model=self.surrogate_model,
                                             eta=incumbent_value,
                                             num_data=num_config_evaluated)
            challengers = self.optimizer.maximize(
                runhistory=self.history_container, num_points=5000)
            is_repeated_config = True
            repeated_time = 0
            cur_config = None
            while is_repeated_config:
                cur_config = challengers.challengers[repeated_time]
                if cur_config in (self.configurations +
                                  self.failed_configurations):
                    repeated_time += 1
                else:
                    is_repeated_config = False
            return cur_config
        else:
            raise ValueError('Unknown optimization strategy: %s.' %
                             self.optimization_strategy)
Ejemplo n.º 7
0
    def build_source_surrogates(self, normalize):
        if self.source_hpo_data is None:
            self.logger.warning(
                'No history BO data provided, resort to naive BO optimizer without TL.'
            )
            return

        self.logger.info('Start to train base surrogates.')
        start_time = time.time()
        self.source_surrogates = list()
        for hpo_evaluation_data in self.source_hpo_data:
            print('.', end='')
            model = build_surrogate(self.surrogate_type, self.config_space,
                                    np.random.RandomState(self.random_seed))
            _X, _y = list(), list()
            for _config, _config_perf in hpo_evaluation_data.items():
                _X.append(_config)
                _y.append(_config_perf)
            X = convert_configurations_to_array(_X)
            y = np.array(_y, dtype=np.float64)
            if self.num_src_hpo_trial != -1:
                X = X[:self.num_src_hpo_trial]
                y = y[:self.num_src_hpo_trial]

            if normalize == 'standardize':
                if (y == y[0]).all():
                    y[0] += 1e-4
                y, _, _ = zero_mean_unit_var_normalization(y)
            elif normalize == 'scale':
                if (y == y[0]).all():
                    y[0] += 1e-4
                y, _, _ = zero_one_normalization(y)
                y = 2 * y - 1.
            else:
                raise ValueError('Invalid parameter in norm.')

            self.eta_list.append(np.min(y))
            model.train(X, y)
            self.source_surrogates.append(model)
        self.logger.info('Building base surrogates took %.3fs.' %
                         (time.time() - start_time))
Ejemplo n.º 8
0
    def _get_suggestion(self, history_container=None):
        if history_container is None:
            history_container = self.history_container

        num_config_all = len(history_container.configurations) + len(
            self.running_configs)
        num_config_successful = len(history_container.successful_perfs)

        if (num_config_all < self.init_num) or \
                num_config_successful < self.bo_start_n or \
                self.optimization_strategy == 'random':
            if num_config_all >= len(self.initial_configurations):
                _config = self.sample_random_configs(1, history_container)[0]
            else:
                _config = self.initial_configurations[num_config_all]
            return _config

        # sample random configuration proportionally
        if self.rng.random() < self.rand_prob:
            self.logger.info('Sample random config. rand_prob=%f.' %
                             self.rand_prob)
            return self.sample_random_configs(
                1, history_container, excluded_configs=self.running_configs)[0]

        X = convert_configurations_to_array(history_container.configurations)
        Y = history_container.get_transformed_perfs(transform=None)
        # cY = history_container.get_transformed_constraint_perfs(transform='bilog')

        if self.batch_strategy == 'median_imputation':
            # set bilog_transform=False to get real cY for estimating median
            cY = history_container.get_transformed_constraint_perfs(
                transform=None)

            estimated_y = np.median(Y, axis=0).reshape(-1).tolist()
            estimated_c = np.median(
                cY, axis=0).tolist() if self.num_constraints > 0 else None
            batch_history_container = copy.deepcopy(history_container)
            # imputation
            for config in self.running_configs:
                observation = Observation(config=config,
                                          objs=estimated_y,
                                          constraints=estimated_c,
                                          trial_state=SUCCESS,
                                          elapsed_time=None)
                batch_history_container.update_observation(observation)

            # use super class get_suggestion
            return super().get_suggestion(batch_history_container)

        elif self.batch_strategy == 'local_penalization':
            # local_penalization only supports single objective with no constraint
            self.surrogate_model.train(X, Y)
            incumbent_value = history_container.get_incumbents()[0][1]
            self.acquisition_function.update(
                model=self.surrogate_model,
                eta=incumbent_value,
                num_data=len(history_container.data),
                batch_configs=self.running_configs)

            challengers = self.optimizer.maximize(runhistory=history_container,
                                                  num_points=5000)
            return challengers.challengers[0]

        elif self.batch_strategy == 'default':
            # select first N candidates
            candidates = super().get_suggestion(history_container,
                                                return_list=True)

            for config in candidates:
                if config not in self.running_configs and config not in history_container.configurations:
                    return config

            self.logger.warning(
                'Cannot get non duplicate configuration from BO candidates (len=%d). '
                'Sample random config.' % (len(candidates), ))
            return self.sample_random_configs(
                1, history_container, excluded_configs=self.running_configs)[0]
        else:
            raise ValueError('Invalid sampling strategy - %s.' %
                             self.batch_strategy)
Ejemplo n.º 9
0
    def get_suggestion(self, history_container=None):
        """
        Generate a configuration (suggestion) for this query.
        Returns
        -------
        A configuration.
        """
        if history_container is None:
            history_container = self.history_container

        num_config_evaluated = len(history_container.configurations)
        num_config_successful = len(history_container.successful_perfs)

        if num_config_evaluated < self.init_num:
            return self.initial_configurations[num_config_evaluated]

        if self.optimization_strategy == 'random':
            return self.sample_random_configs(1, history_container)[0]

        X = convert_configurations_to_array(history_container.configurations)
        Y = history_container.get_transformed_perfs()
        cY = history_container.get_transformed_constraint_perfs()

        if self.optimization_strategy == 'bo':
            if num_config_successful < max(self.init_num, 1):
                self.logger.warning('No enough successful initial trials! Sample random configuration.')
                return self.sample_random_configs(1, history_container)[0]

            # train surrogate model
            if self.num_objs == 1:
                self.surrogate_model.train(X, Y)
            elif self.acq_type == 'parego':
                weights = self.rng.random_sample(self.num_objs)
                weights = weights / np.sum(weights)
                scalarized_obj = get_chebyshev_scalarization(weights, Y)
                self.surrogate_model.train(X, scalarized_obj(Y))
            else:  # multi-objectives
                for i in range(self.num_objs):
                    self.surrogate_model[i].train(X, Y[:, i])

            # train constraint model
            for i in range(self.num_constraints):
                self.constraint_models[i].train(X, cY[:, i])

            # update acquisition function
            if self.num_objs == 1:
                incumbent_value = history_container.get_incumbents()[0][1]
                self.acquisition_function.update(model=self.surrogate_model,
                                                 constraint_models=self.constraint_models,
                                                 eta=incumbent_value,
                                                 num_data=num_config_evaluated)
            else:  # multi-objectives
                mo_incumbent_value = history_container.get_mo_incumbent_value()
                if self.acq_type == 'parego':
                    self.acquisition_function.update(model=self.surrogate_model,
                                                     constraint_models=self.constraint_models,
                                                     eta=scalarized_obj(np.atleast_2d(mo_incumbent_value)),
                                                     num_data=num_config_evaluated)
                elif self.acq_type.startswith('ehvi'):
                    partitioning = NondominatedPartitioning(self.num_objs, Y)
                    cell_bounds = partitioning.get_hypercell_bounds(ref_point=self.ref_point)
                    self.acquisition_function.update(model=self.surrogate_model,
                                                     constraint_models=self.constraint_models,
                                                     cell_lower_bounds=cell_bounds[0],
                                                     cell_upper_bounds=cell_bounds[1])
                else:
                    self.acquisition_function.update(model=self.surrogate_model,
                                                     constraint_models=self.constraint_models,
                                                     constraint_perfs=cY,  # for MESMOC
                                                     eta=mo_incumbent_value,
                                                     num_data=num_config_evaluated,
                                                     X=X, Y=Y)

            # optimize acquisition function
            challengers = self.optimizer.maximize(runhistory=history_container,
                                                  num_points=5000)
            is_repeated_config = True
            repeated_time = 0
            cur_config = None
            while is_repeated_config:
                cur_config = challengers.challengers[repeated_time]  # todo: test small space
                if cur_config in history_container.configurations:
                    repeated_time += 1
                else:
                    is_repeated_config = False
            return cur_config
        else:
            raise ValueError('Unknown optimization strategy: %s.' % self.optimization_strategy)
Ejemplo n.º 10
0
    def get_suggestion(self, history_container=None):
        if history_container is None:
            history_container = self.history_container

        # Check if turbo needs to be restarted
        if self.use_trust_region and self.turbo_state.restart_triggered:
            history_container.restart()
            print('-' * 30)
            print('Restart!')

        num_config_evaluated = len(history_container.configurations)
        num_config_successful = len(history_container.successful_perfs)

        if num_config_evaluated < self.init_num:
            return self.initial_configurations[num_config_evaluated]

        if self.optimization_strategy == 'random':
            return self.sample_random_configs(1)[0]

        X = convert_configurations_to_array(history_container.configurations)
        Y = history_container.get_transformed_perfs(transform=None)
        cY = history_container.get_transformed_constraint_perfs(
            transform='bilog')

        if self.optimization_strategy == 'bo':
            if num_config_successful < max(self.init_num, 1):
                self.logger.warning(
                    'No enough successful initial trials! Sample random configuration.'
                )
                return self.sample_random_configs(1)[0]

            # train surrogate model
            if self.num_objs == 1:
                self.surrogate_model.train(X, Y)
            else:  # multi-objectives
                for i in range(self.num_objs):
                    self.surrogate_model[i].train(X, Y[:, i])

            # train constraint model
            for i in range(self.num_constraints):
                self.constraint_models[i].train(X, cY[:, i])

            # update acquisition function
            if self.num_objs == 1:  # MC-EI
                incumbent_value = history_container.get_incumbents()[0][1]
                self.acquisition_function.update(
                    model=self.surrogate_model,
                    constraint_models=self.constraint_models,
                    eta=incumbent_value)
            else:  # MC-ParEGO or MC-EHVI
                if self.acq_type.startswith('mcparego'):
                    self.acquisition_function.update(
                        model=self.surrogate_model,
                        constraint_models=self.constraint_models)
                elif self.acq_type.startswith('mcehvi'):
                    partitioning = NondominatedPartitioning(self.num_objs, Y)
                    cell_bounds = partitioning.get_hypercell_bounds(
                        ref_point=self.ref_point)
                    self.acquisition_function.update(
                        model=self.surrogate_model,
                        constraint_models=self.constraint_models,
                        cell_lower_bounds=cell_bounds[0],
                        cell_upper_bounds=cell_bounds[1])

            # optimize acquisition function
            challengers = self.optimizer.maximize(runhistory=history_container,
                                                  num_points=5000,
                                                  turbo_state=self.turbo_state)
            is_repeated_config = True
            repeated_time = 0
            cur_config = None
            while is_repeated_config:
                cur_config = challengers.challengers[repeated_time]
                if cur_config in history_container.configurations:
                    repeated_time += 1
                else:
                    is_repeated_config = False
            return cur_config
        else:
            raise ValueError('Unknown optimization strategy: %s.' %
                             self.optimization_strategy)
Ejemplo n.º 11
0
    def get_suggestion(self, history_container=None, return_list=False):
        """
        Generate a configuration (suggestion) for this query.
        Returns
        -------
        A configuration.
        """
        if history_container is None:
            history_container = self.history_container

        self.alter_model(history_container)

        num_config_evaluated = len(history_container.configurations)
        num_config_successful = len(history_container.successful_perfs)

        if num_config_evaluated < self.init_num:
            return self.initial_configurations[num_config_evaluated]

        if self.optimization_strategy == 'random':
            return self.sample_random_configs(1, history_container)[0]

        if (not return_list) and self.rng.random() < self.rand_prob:
            self.logger.info('Sample random config. rand_prob=%f.' %
                             self.rand_prob)
            return self.sample_random_configs(1, history_container)[0]

        X = convert_configurations_to_array(history_container.configurations)
        Y = history_container.get_transformed_perfs(transform=None)
        cY = history_container.get_transformed_constraint_perfs(
            transform='bilog')

        if self.optimization_strategy == 'bo':
            if num_config_successful < max(self.init_num, 1):
                self.logger.warning(
                    'No enough successful initial trials! Sample random configuration.'
                )
                return self.sample_random_configs(1, history_container)[0]

            # train surrogate model
            if self.num_objs == 1:
                self.surrogate_model.train(X, Y)
            elif self.acq_type == 'parego':
                weights = self.rng.random_sample(self.num_objs)
                weights = weights / np.sum(weights)
                scalarized_obj = get_chebyshev_scalarization(weights, Y)
                self.surrogate_model.train(X, scalarized_obj(Y))
            else:  # multi-objectives
                for i in range(self.num_objs):
                    self.surrogate_model[i].train(X, Y[:, i])

            # train constraint model
            for i in range(self.num_constraints):
                self.constraint_models[i].train(X, cY[:, i])

            # update acquisition function
            if self.num_objs == 1:
                incumbent_value = history_container.get_incumbents()[0][1]
                self.acquisition_function.update(
                    model=self.surrogate_model,
                    constraint_models=self.constraint_models,
                    eta=incumbent_value,
                    num_data=num_config_evaluated)
            else:  # multi-objectives
                mo_incumbent_value = history_container.get_mo_incumbent_value()
                if self.acq_type == 'parego':
                    self.acquisition_function.update(
                        model=self.surrogate_model,
                        constraint_models=self.constraint_models,
                        eta=scalarized_obj(np.atleast_2d(mo_incumbent_value)),
                        num_data=num_config_evaluated)
                elif self.acq_type.startswith('ehvi'):
                    partitioning = NondominatedPartitioning(self.num_objs, Y)
                    cell_bounds = partitioning.get_hypercell_bounds(
                        ref_point=self.ref_point)
                    self.acquisition_function.update(
                        model=self.surrogate_model,
                        constraint_models=self.constraint_models,
                        cell_lower_bounds=cell_bounds[0],
                        cell_upper_bounds=cell_bounds[1])
                else:
                    self.acquisition_function.update(
                        model=self.surrogate_model,
                        constraint_models=self.constraint_models,
                        constraint_perfs=cY,  # for MESMOC
                        eta=mo_incumbent_value,
                        num_data=num_config_evaluated,
                        X=X,
                        Y=Y)

            # optimize acquisition function
            challengers = self.optimizer.maximize(runhistory=history_container,
                                                  num_points=5000)
            if return_list:
                # Caution: return_list doesn't contain random configs sampled according to rand_prob
                return challengers.challengers

            for config in challengers.challengers:
                if config not in history_container.configurations:
                    return config
            self.logger.warning(
                'Cannot get non duplicate configuration from BO candidates (len=%d). '
                'Sample random config.' % (len(challengers.challengers), ))
            return self.sample_random_configs(1, history_container)[0]
        else:
            raise ValueError('Unknown optimization strategy: %s.' %
                             self.optimization_strategy)
Ejemplo n.º 12
0
    def iterate(self, skip_last=0):

        for s in reversed(range(self.s_max + 1)):

            if self.update_enable and self.weight_update_id > self.s_max:
                self.update_weight()
            self.weight_update_id += 1

            # Set initial number of configurations
            n = int(ceil(self.B / self.R / (s + 1) * self.eta ** s))
            # initial number of iterations per config
            r = int(self.R * self.eta ** (-s))

            # Choose a batch of configurations in different mechanisms.
            start_time = time.time()
            T = self.choose_next(n)
            time_elapsed = time.time() - start_time
            self.logger.info("[%s] Choosing next configurations took %.2f sec." % (self.method_name, time_elapsed))

            extra_info = None
            last_run_num = None

            for i in range((s + 1) - int(skip_last)):  # changed from s + 1

                # Run each of the n configs for <iterations>
                # and keep best (n_configs / eta) configurations

                n_configs = n * self.eta ** (-i)
                n_iteration = r * self.eta ** (i)

                n_iter = n_iteration
                if last_run_num is not None and not self.restart_needed:
                    n_iter -= last_run_num
                last_run_num = n_iteration

                self.logger.info("%s: %d configurations x %d iterations each" %
                                 (self.method_name, int(n_configs), int(n_iteration)))

                ret_val, early_stops = self.run_in_parallel(T, n_iter, extra_info)
                val_losses = [item['loss'] for item in ret_val]
                ref_list = [item['ref_id'] for item in ret_val]

                self.target_x[int(n_iteration)].extend(T)
                self.target_y[int(n_iteration)].extend(val_losses)

                if int(n_iteration) == self.R:
                    self.incumbent_configs.extend(T)
                    self.incumbent_perfs.extend(val_losses)
                    # Update history container.
                    for _config, _perf in zip(T, val_losses):
                        self.history_container.add(_config, _perf)

                # Select a number of best configurations for the next loop.
                # Filter out early stops, if any.
                indices = np.argsort(val_losses)
                if len(T) == sum(early_stops):
                    break
                if len(T) >= self.eta:
                    indices = [i for i in indices if not early_stops[i]]
                    T = [T[i] for i in indices]
                    extra_info = [ref_list[i] for i in indices]
                    reduced_num = int(n_configs / self.eta)
                    T = T[0:reduced_num]
                    extra_info = extra_info[0:reduced_num]
                else:
                    T = [T[indices[0]]]     # todo: confirm no filter early stops?
                    extra_info = [ref_list[indices[0]]]
                val_losses = [val_losses[i] for i in indices][0:len(T)]  # update: sorted
                incumbent_loss = val_losses[0]
                self.add_stage_history(self.stage_id, min(self.global_incumbent, incumbent_loss))
                self.stage_id += 1
            # self.remove_immediate_model()

            for item in self.iterate_r[self.iterate_r.index(r):]:
                # NORMALIZE Objective value: normalization
                normalized_y = std_normalization(self.target_y[item])
                self.weighted_surrogate.train(convert_configurations_to_array(self.target_x[item]),
                                              np.array(normalized_y, dtype=np.float64), r=item)
Ejemplo n.º 13
0
    def get_suggestions(self, batch_size=None, history_container=None):
        if batch_size is None:
            batch_size = self.batch_size
        assert batch_size >= 1
        if history_container is None:
            history_container = self.history_container

        num_config_evaluated = len(history_container.configurations)
        num_config_successful = len(history_container.successful_perfs)

        if num_config_evaluated < self.init_num:
            if self.initial_configurations is not None:  # self.init_num equals to len(self.initial_configurations)
                next_configs = self.initial_configurations[
                    num_config_evaluated:num_config_evaluated + batch_size]
                if len(next_configs) < batch_size:
                    next_configs.extend(
                        self.sample_random_configs(
                            batch_size - len(next_configs), history_container))
                return next_configs
            else:
                return self.sample_random_configs(batch_size,
                                                  history_container)

        if self.optimization_strategy == 'random':
            return self.sample_random_configs(batch_size, history_container)

        if num_config_successful < max(self.init_num, 1):
            self.logger.warning(
                'No enough successful initial trials! Sample random configurations.'
            )
            return self.sample_random_configs(batch_size, history_container)

        X = convert_configurations_to_array(history_container.configurations)
        Y = history_container.get_transformed_perfs(transform=None)
        # cY = history_container.get_transformed_constraint_perfs(transform='bilog')

        batch_configs_list = list()

        if self.batch_strategy == 'median_imputation':
            # set bilog_transform=False to get real cY for estimating median
            cY = history_container.get_transformed_constraint_perfs(
                transform=None)

            estimated_y = np.median(Y, axis=0).reshape(-1).tolist()
            estimated_c = np.median(
                cY, axis=0).tolist() if self.num_constraints > 0 else None
            batch_history_container = copy.deepcopy(history_container)

            for batch_i in range(batch_size):
                # use super class get_suggestion
                curr_batch_config = super().get_suggestion(
                    batch_history_container)

                # imputation
                observation = Observation(config=curr_batch_config,
                                          objs=estimated_y,
                                          constraints=estimated_c,
                                          trial_state=SUCCESS,
                                          elapsed_time=None)
                batch_history_container.update_observation(observation)
                batch_configs_list.append(curr_batch_config)

        elif self.batch_strategy == 'local_penalization':
            # local_penalization only supports single objective with no constraint
            self.surrogate_model.train(X, Y)
            incumbent_value = history_container.get_incumbents()[0][1]
            # L = self.estimate_L(X)
            for i in range(batch_size):
                if self.rng.random() < self.rand_prob:
                    # sample random configuration proportionally
                    self.logger.info('Sample random config. rand_prob=%f.' %
                                     self.rand_prob)
                    cur_config = self.sample_random_configs(
                        1,
                        history_container,
                        excluded_configs=batch_configs_list)[0]
                else:
                    self.acquisition_function.update(
                        model=self.surrogate_model,
                        eta=incumbent_value,
                        num_data=len(history_container.data),
                        batch_configs=batch_configs_list)

                    challengers = self.optimizer.maximize(
                        runhistory=history_container,
                        num_points=5000,
                    )
                    cur_config = challengers.challengers[0]
                batch_configs_list.append(cur_config)
        elif self.batch_strategy == 'reoptimization':
            surrogate_trained = False
            for i in range(batch_size):
                if self.rng.random() < self.rand_prob:
                    # sample random configuration proportionally
                    self.logger.info('Sample random config. rand_prob=%f.' %
                                     self.rand_prob)
                    cur_config = self.sample_random_configs(
                        1,
                        history_container,
                        excluded_configs=batch_configs_list)[0]
                else:
                    if not surrogate_trained:
                        # set return_list=True to ensure surrogate trained
                        candidates = super().get_suggestion(history_container,
                                                            return_list=True)
                        surrogate_trained = True
                    else:
                        # re-optimize acquisition function
                        challengers = self.optimizer.maximize(
                            runhistory=history_container, num_points=5000)
                        candidates = challengers.challengers
                    cur_config = None
                    for config in candidates:
                        if config not in batch_configs_list and config not in history_container.configurations:
                            cur_config = config
                            break
                    if cur_config is None:
                        self.logger.warning(
                            'Cannot get non duplicate configuration from BO candidates (len=%d). '
                            'Sample random config.' % (len(candidates), ))
                        cur_config = self.sample_random_configs(
                            1,
                            history_container,
                            excluded_configs=batch_configs_list)[0]
                batch_configs_list.append(cur_config)
        elif self.batch_strategy == 'default':
            # select first N candidates
            candidates = super().get_suggestion(history_container,
                                                return_list=True)
            idx = 0
            while len(batch_configs_list) < batch_size:
                if idx >= len(candidates):
                    self.logger.warning(
                        'Cannot get non duplicate configuration from BO candidates (len=%d). '
                        'Sample random config.' % (len(candidates), ))
                    cur_config = self.sample_random_configs(
                        1,
                        history_container,
                        excluded_configs=batch_configs_list)[0]
                elif self.rng.random() < self.rand_prob:
                    # sample random configuration proportionally
                    self.logger.info('Sample random config. rand_prob=%f.' %
                                     self.rand_prob)
                    cur_config = self.sample_random_configs(
                        1,
                        history_container,
                        excluded_configs=batch_configs_list)[0]
                else:
                    cur_config = None
                    while idx < len(candidates):
                        conf = candidates[idx]
                        idx += 1
                        if conf not in batch_configs_list and conf not in history_container.configurations:
                            cur_config = conf
                            break
                if cur_config is not None:
                    batch_configs_list.append(cur_config)

        else:
            raise ValueError('Invalid sampling strategy - %s.' %
                             self.batch_strategy)
        return batch_configs_list
Ejemplo n.º 14
0
    def get_suggestions(self, batch_size=None, history_container=None):
        if batch_size is None:
            batch_size = self.batch_size
        if history_container is None:
            history_container = self.history_container

        num_config_evaluated = len(history_container.configurations)
        num_config_successful = len(history_container.successful_perfs)

        if num_config_evaluated < self.init_num:
            if self.initial_configurations is not None:  # self.init_num equals to len(self.initial_configurations)
                return self.initial_configurations
            else:
                return self.sample_random_configs(self.init_num,
                                                  history_container)

        if self.optimization_strategy == 'random':
            return self.sample_random_configs(batch_size, history_container)

        if num_config_successful < max(self.init_num, 1):
            self.logger.warning(
                'No enough successful initial trials! Sample random configurations.'
            )
            return self.sample_random_configs(batch_size, history_container)

        X = convert_configurations_to_array(history_container.configurations)
        Y = history_container.get_transformed_perfs()
        # cY = history_container.get_transformed_constraint_perfs()

        batch_configs_list = list()

        if self.batch_strategy == 'median_imputation':
            # set bilog_transform=False to get real cY for estimating median
            cY = history_container.get_transformed_constraint_perfs(
                bilog_transform=False)

            estimated_y = np.median(Y, axis=0).reshape(-1).tolist()
            estimated_c = np.median(
                cY, axis=0).tolist() if self.num_constraints > 0 else None
            batch_history_container = copy.deepcopy(history_container)

            for batch_i in range(batch_size):
                # use super class get_suggestion
                curr_batch_config = super().get_suggestion(
                    batch_history_container)

                # imputation
                observation = Observation(curr_batch_config, SUCCESS,
                                          estimated_c, estimated_y, None)
                batch_history_container.update_observation(observation)
                batch_configs_list.append(curr_batch_config)

        elif self.batch_strategy == 'local_penalization':
            # local_penalization only supports single objective with no constraint
            self.surrogate_model.train(X, Y)
            incumbent_value = history_container.get_incumbents()[0][1]
            # L = self.estimate_L(X)
            for i in range(batch_size):
                self.acquisition_function.update(
                    model=self.surrogate_model,
                    eta=incumbent_value,
                    num_data=len(history_container.data),
                    batch_configs=batch_configs_list)

                challengers = self.optimizer.maximize(
                    runhistory=history_container,
                    num_points=5000,
                )
                batch_configs_list.append(challengers.challengers[0])
        else:
            raise ValueError('Invalid sampling strategy - %s.' %
                             self.batch_strategy)
        return batch_configs_list
Ejemplo n.º 15
0
    def get_suggestions(self, n_suggestions):
        if len(self.configurations) == 0:
            X = np.array([])
        else:
            failed_configs = list(
            ) if self.max_y is None else self.failed_configurations.copy()
            X = convert_configurations_to_array(self.configurations +
                                                failed_configs)

        num_failed_trial = len(self.failed_configurations)
        failed_perfs = list() if self.max_y is None else [self.max_y
                                                          ] * num_failed_trial
        Y = np.array(self.perfs + failed_perfs, dtype=np.float64)

        num_config_evaluated = len(self.perfs)
        batch_configs_list = list()

        if num_config_evaluated < self.init_num or self.optimization_strategy == 'random':
            return self.sample_random_configs(n_suggestions)

        if self.batch_strategy == 'median_imputation':
            estimated_y = np.median(Y)
            batch_history_container = copy.deepcopy(self.history_container)
            for i in range(n_suggestions):
                self.surrogate_model.train(X, Y, snapshot=(i == 0))
                incumbent_value = batch_history_container.get_incumbents(
                )[0][1]
                self.acquisition_function.update(
                    model=self.surrogate_model,
                    eta=incumbent_value,
                    num_data=len(batch_history_container.data))

                challengers = self.optimizer.maximize(
                    runhistory=batch_history_container, num_points=5000)

                is_repeated_config = True
                repeated_time = 0
                curr_batch_config = None

                while is_repeated_config:
                    try:
                        curr_batch_config = challengers.challengers[
                            repeated_time]
                        batch_history_container.add(curr_batch_config,
                                                    estimated_y)
                    except ValueError:
                        is_repeated_config = True
                        repeated_time += 1
                    else:
                        is_repeated_config = False

                batch_configs_list.append(curr_batch_config)
                X = np.append(X,
                              convert_configurations_to_array(
                                  [curr_batch_config]),
                              axis=0)
                Y = np.append(Y, estimated_y)

        elif self.batch_strategy == 'local_penalization':
            self.surrogate_model.train(X, Y)
            incumbent_value = self.history_container.get_incumbents()[0][1]
            # L = self.estimate_L(X)
            for i in range(n_suggestions):
                self.acquisition_function.update(
                    model=self.surrogate_model,
                    eta=incumbent_value,
                    num_data=len(self.history_container.data),
                    batch_configs=batch_configs_list)

                challengers = self.optimizer.maximize(
                    runhistory=self.history_container, num_points=5000)
                batch_configs_list.append(challengers.challengers[0])
        else:
            raise ValueError('Invalid sampling strategy - %s.' %
                             self.batch_strategy)
        return batch_configs_list
Ejemplo n.º 16
0
    def iterate(self):
        if len(self.configurations) == 0:
            X = np.array([])
        else:
            failed_configs = list() if self.max_y is None else self.failed_configurations.copy()
            X = convert_configurations_to_array(self.configurations + failed_configs)

        failed_perfs = list() if self.max_y is None else [self.max_y] * len(self.failed_configurations)
        Y = np.array(self.perfs + failed_perfs, dtype=np.float64)

        config_list = self.choose_next(X, Y)
        trial_state_list = list()
        trial_info_list = list()
        perf_list = list()

        for i, config in enumerate(config_list):
            trial_state_list.append(SUCCESS)
            trial_info_list.append(None)
            perf_list.append(None)

            if config not in (self.configurations + self.failed_configurations):
                # Evaluate this configuration.
                try:
                    args, kwargs = (config,), dict()
                    timeout_status, _result = time_limit(self.objective_function, self.time_limit_per_trial,
                                                         args=args, kwargs=kwargs)
                    if timeout_status:
                        raise TimeoutException(
                            'Timeout: time limit for this evaluation is %.1fs' % self.time_limit_per_trial)
                    else:
                        perf_list[i] = _result
                except Exception as e:
                    if isinstance(e, TimeoutException):
                        trial_state_list[i] = TIMEOUT
                    else:
                        traceback.print_exc(file=sys.stdout)
                        trial_state_list[i] = FAILED
                    perf_list[i] = MAXINT
                    trial_info_list[i] = str(e)
                    self.logger.error(trial_info_list[i])

                if trial_state_list[i] == SUCCESS and perf_list[i] < MAXINT:
                    if len(self.configurations) == 0:
                        self.default_obj_value = perf_list[i]

                    self.configurations.append(config)
                    self.perfs.append(perf_list[i])
                    self.history_container.add(config, perf_list[i])

                    self.perc = np.percentile(self.perfs, self.scale_perc)
                    self.min_y = np.min(self.perfs)
                    self.max_y = np.max(self.perfs)
                else:
                    self.failed_configurations.append(config)
            else:
                self.logger.debug('This configuration has been evaluated! Skip it.')
                if config in self.configurations:
                    config_idx = self.configurations.index(config)
                    trial_state_list[i], perf_list[i] = SUCCESS, self.perfs[config_idx]
                else:
                    trial_state_list[i], perf_list[i] = FAILED, MAXINT

        self.iteration_id += 1
        self.logger.info(
            'Iteration-%d, objective improvement: %.4f' % (
                self.iteration_id, max(0, self.default_obj_value - min(perf_list))))
        return config_list, trial_state_list, perf_list, trial_info_list
Ejemplo n.º 17
0
    def update_weight(self):
        start_time = time.time()

        max_r = self.iterate_r[-1]
        incumbent_configs = self.target_x[max_r]
        test_x = convert_configurations_to_array(incumbent_configs)
        test_y = np.array(self.target_y[max_r], dtype=np.float64)

        r_list = self.surrogate.surrogate_r
        K = len(r_list)

        if len(test_y) >= 3:
            # Get previous weights
            if self.weight_method == 'rank_loss_p_norm':
                preserving_order_p = list()
                preserving_order_nums = list()
                for i, r in enumerate(r_list):
                    fold_num = 5
                    if i != K - 1:
                        mean, var = self.surrogate.surrogate_container[
                            r].predict(test_x)
                        tmp_y = np.reshape(mean, -1)
                        preorder_num, pair_num = self.calculate_preserving_order_num(
                            tmp_y, test_y)
                        preserving_order_p.append(preorder_num / pair_num)
                        preserving_order_nums.append(preorder_num)
                    else:
                        if len(test_y) < 2 * fold_num:
                            preserving_order_p.append(0)
                        else:
                            # 5-fold cross validation.
                            kfold = KFold(n_splits=fold_num)
                            cv_pred = np.array([0] * len(test_y))
                            for train_idx, valid_idx in kfold.split(test_x):
                                train_configs, train_y = test_x[
                                    train_idx], test_y[train_idx]
                                valid_configs, valid_y = test_x[
                                    valid_idx], test_y[valid_idx]
                                types, bounds = get_types(self.config_space)
                                _surrogate = RandomForestWithInstances(
                                    types=types, bounds=bounds)
                                _surrogate.train(train_configs, train_y)
                                pred, _ = _surrogate.predict(valid_configs)
                                cv_pred[valid_idx] = pred.reshape(-1)
                            preorder_num, pair_num = self.calculate_preserving_order_num(
                                cv_pred, test_y)
                            preserving_order_p.append(preorder_num / pair_num)
                            preserving_order_nums.append(preorder_num)

                trans_order_weight = np.array(preserving_order_p)
                power_sum = np.sum(np.power(trans_order_weight,
                                            self.power_num))
                new_weights = np.power(trans_order_weight,
                                       self.power_num) / power_sum

            elif self.weight_method == 'rank_loss_prob':
                # For basic surrogate i=1:K-1.
                mean_list, var_list = list(), list()
                for i, r in enumerate(r_list[:-1]):
                    mean, var = self.surrogate.surrogate_container[r].predict(
                        test_x)
                    mean_list.append(np.reshape(mean, -1))
                    var_list.append(np.reshape(var, -1))
                sample_num = 100
                min_probability_array = [0] * K
                for _ in range(sample_num):
                    order_preseving_nums = list()

                    # For basic surrogate i=1:K-1.
                    for idx in range(K - 1):
                        sampled_y = self.rng.normal(mean_list[idx],
                                                    var_list[idx])
                        _num, _ = self.calculate_preserving_order_num(
                            sampled_y, test_y)
                        order_preseving_nums.append(_num)

                    fold_num = 5
                    # For basic surrogate i=K. cv
                    if len(test_y) < 2 * fold_num:
                        order_preseving_nums.append(0)
                    else:
                        # 5-fold cross validation.
                        kfold = KFold(n_splits=fold_num)
                        cv_pred = np.array([0] * len(test_y))
                        for train_idx, valid_idx in kfold.split(test_x):
                            train_configs, train_y = test_x[train_idx], test_y[
                                train_idx]
                            valid_configs, valid_y = test_x[valid_idx], test_y[
                                valid_idx]
                            types, bounds = get_types(self.config_space)
                            _surrogate = RandomForestWithInstances(
                                types=types, bounds=bounds)
                            _surrogate.train(train_configs, train_y)
                            _pred, _var = _surrogate.predict(valid_configs)
                            sampled_pred = self.rng.normal(
                                _pred.reshape(-1), _var.reshape(-1))
                            cv_pred[valid_idx] = sampled_pred
                        _num, _ = self.calculate_preserving_order_num(
                            cv_pred, test_y)
                        order_preseving_nums.append(_num)
                    max_id = np.argmax(order_preseving_nums)
                    min_probability_array[max_id] += 1
                new_weights = np.array(min_probability_array) / sample_num
            else:
                raise ValueError('Invalid weight method: %s!' %
                                 self.weight_method)
        else:
            old_weights = list()
            for i, r in enumerate(r_list):
                _weight = self.surrogate.surrogate_weight[r]
                old_weights.append(_weight)
            new_weights = old_weights.copy()

        self.logger.info(
            '[%s] %d-th Updating weights: %s' %
            (self.weight_method, self.weight_changed_cnt, str(new_weights)))

        # Assign the weight to each basic surrogate.
        for i, r in enumerate(r_list):
            self.surrogate.surrogate_weight[r] = new_weights[i]
        self.weight_changed_cnt += 1
        # Save the weight data.
        self.hist_weights.append(new_weights)
        dir_path = os.path.join(self.data_directory, 'saved_weights')
        file_name = 'mfes_weights_%s.npy' % (self.method_name, )
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)
        np.save(os.path.join(dir_path, file_name),
                np.asarray(self.hist_weights))
        self.logger.info(
            'update_weight() cost %.2fs. new weights are saved to %s' %
            (time.time() - start_time, os.path.join(dir_path, file_name)))