Python EI Examples

Programming Language: Python

Namespace/Package Name: mfes.acquisition_function.acquisition

Class/Type: EI

Examples at hotexamples.com: 12

Python EI - 12 examples found. These are the top rated real world Python examples of mfes.acquisition_function.acquisition.EI extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

update(7)

EI(6)

Frequently Used Methods

update (7)

EI (6)

Example #1

Show file

    def __init__(self, config_space, objective_func, R,
                 num_iter=10, n_workers=1, eta=3, es_gap=9, rho=0.7,
                 random_state=1, method_id="Default"):
        BaseFacade.__init__(self, objective_func, n_workers=n_workers, need_lc=True, method_name=method_id)
        self.seed = random_state
        self.config_space = config_space
        self.config_space.seed(self.seed)
        self.R = R
        self.num_iter = num_iter
        self.eta = eta
        self.logeta = lambda x: log(x) / log(self.eta)
        self.s_max = int(self.logeta(R))
        self.inner_iteration_n = (self.s_max + 1) * (self.s_max + 1)

        types, bounds = get_types(config_space)
        self.num_config = len(bounds)
        self.surrogate = RandomForestWithInstances(types=types, bounds=bounds)
        self.acquisition_func = EI(model=self.surrogate)
        # TODO: add SMAC's optimization algorithm.
        self.acq_optimizer = RandomSampling(self.acquisition_func, config_space,
                                            n_samples=max(500, 50 * self.num_config))

        self.incumbent_configs = []
        self.incumbent_obj = []

        self.lcnet_model = LC_ES()
        self.early_stop_gap = es_gap
        self.es_rho = rho
        self.lc_training_x = None
        self.lc_training_y = None

Example #2

Show file

File: tse_xgb.py Project: Dee-Why/hp-tuner

def mini_smac(learn_delta):
    sample_num_m = s_mid
    sample_num_l = s_min
    if not learn_delta:
        sample_num_m = s_min

    start_time = time.time()
    config_space = create_configspace()
    types, bounds = get_types(config_space)
    num_hp = len(bounds)
    surrogate = RandomForestWithInstances(types=types, bounds=bounds)
    acquisition_func = EI(model=surrogate)
    acq_optimizer = RandomSampling(acquisition_func, config_space, n_samples=max(500, 50 * num_hp))
    X = []
    y = []
    y_delta = []
    c = []
    inc_y = 1.

    # Initial design.
    for _ in range(num_init):
        init_configs = sample_configurations(config_space, num_init)
        for config in init_configs:
            perf_t, _ = objective_function((config.get_dictionary(), sample_num_m))
            X.append(config)
            y.append(perf_t)
            if perf_t < inc_y:
                inc_y = perf_t
            c.append([time.time()-start_time, inc_y])
            if learn_delta:
                perf_l, _ = objective_function((config.get_dictionary(), sample_num_l))
                y_delta.append(perf_t - perf_l)
            else:
                y_delta.append(perf_t)

    # BO iterations.
    for _ in range(num_iter - num_init):
        # Update the surrogate model.
        surrogate.train(convert_configurations_to_array(X), np.array(y, dtype=np.float64))

        # Use EI acq to choose next config.
        incumbent = dict()
        best_index = np.argmin(y)
        incumbent['obj'] = y[best_index]
        incumbent['config'] = X[best_index]
        acquisition_func.update(model=surrogate, eta=incumbent)
        next_config = acq_optimizer.maximize(batch_size=1)[0]
        perf_t, _ = objective_function((next_config.get_dictionary(), sample_num_m))
        X.append(next_config)
        y.append(perf_t)
        if perf_t < inc_y:
            inc_y = perf_t
        c.append([time.time() - start_time, inc_y])
        if learn_delta:
            perf_l, _ = objective_function((config.get_dictionary(), sample_num_l))
            y_delta.append(perf_t - perf_l)
        else:
            y_delta.append(perf_t)

    return [convert_configurations_to_array(X), np.array(y_delta, dtype=np.float64)]

Example #3

Show file

    def __init__(self,
                 config_space,
                 objective_func,
                 R,
                 num_iter=10,
                 eta=3,
                 n_workers=1):
        BaseFacade.__init__(self, objective_func, n_workers=n_workers)
        self.config_space = config_space
        self.R = R
        self.eta = eta
        self.logeta = lambda x: log(x) / log(self.eta)
        self.s_max = int(self.logeta(self.R))
        self.B = (self.s_max + 1) * self.R
        self.num_iter = num_iter

        types, bounds = get_types(config_space)
        self.num_config = len(bounds)

        # Define the multi-fidelity ensemble surrogate.
        init_weight = [0.]
        init_weight.extend([1 / self.s_max] * self.s_max)
        self.weighted_surrogate = WeightedRandomForestCluster(
            types, bounds, self.s_max, self.eta, init_weight, 'gpoe')
        self.weighted_acquisition_func = EI(model=self.weighted_surrogate)
        self.weighted_acq_optimizer = RandomSampling(
            self.weighted_acquisition_func,
            config_space,
            n_samples=max(1000, 50 * self.num_config))

        self.incumbent_configs = []
        self.incumbent_obj = []
        self.iterate_id = 0
        self.iterate_r = []

        # Store the multi-fidelity evaluation data: D_1, ..., D_K.
        self.target_x = dict()
        self.target_y = dict()
        for index, item in enumerate(
                np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)):
            r = int(item)
            self.iterate_r.append(r)
            self.target_x[r] = []
            self.target_y[r] = []

Example #4

Show file

    def __init__(self,
                 config_space: ConfigurationSpace,
                 objective_func,
                 R,
                 num_iter=10000,
                 eta=3,
                 p=0.3,
                 n_workers=1,
                 random_state=1,
                 method_id='Default'):
        BaseFacade.__init__(self,
                            objective_func,
                            n_workers=n_workers,
                            method_name=method_id)
        self.config_space = config_space
        self.seed = random_state
        self.config_space.seed(self.seed)
        self.p = p
        self.R = R
        self.eta = eta
        self.logeta = lambda x: log(x) / log(self.eta)
        self.s_max = int(self.logeta(self.R))
        self.B = (self.s_max + 1) * self.R
        self.num_iter = num_iter

        types, bounds = get_types(config_space)
        self.num_config = len(bounds)
        self.surrogate = RandomForestWithInstances(types=types, bounds=bounds)
        self.acquisition_func = EI(model=self.surrogate)
        self.acq_optimizer = RandomSampling(self.acquisition_func,
                                            config_space,
                                            n_samples=max(
                                                500, 50 * self.num_config))

        self.incumbent_configs = []
        self.incumbent_obj = []

Example #5

Show file

class HOIST(BaseFacade):
    def __init__(self,
                 config_space,
                 objective_func,
                 R,
                 num_iter=10,
                 eta=3,
                 n_workers=1):
        BaseFacade.__init__(self, objective_func, n_workers=n_workers)
        self.config_space = config_space
        self.R = R
        self.eta = eta
        self.logeta = lambda x: log(x) / log(self.eta)
        self.s_max = int(self.logeta(self.R))
        self.B = (self.s_max + 1) * self.R
        self.num_iter = num_iter

        types, bounds = get_types(config_space)
        self.num_config = len(bounds)

        # Define the multi-fidelity ensemble surrogate.
        init_weight = [0.]
        init_weight.extend([1 / self.s_max] * self.s_max)
        self.weighted_surrogate = WeightedRandomForestCluster(
            types, bounds, self.s_max, self.eta, init_weight, 'gpoe')
        self.weighted_acquisition_func = EI(model=self.weighted_surrogate)
        self.weighted_acq_optimizer = RandomSampling(
            self.weighted_acquisition_func,
            config_space,
            n_samples=max(1000, 50 * self.num_config))

        self.incumbent_configs = []
        self.incumbent_obj = []
        self.iterate_id = 0
        self.iterate_r = []

        # Store the multi-fidelity evaluation data: D_1, ..., D_K.
        self.target_x = dict()
        self.target_y = dict()
        for index, item in enumerate(
                np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)):
            r = int(item)
            self.iterate_r.append(r)
            self.target_x[r] = []
            self.target_y[r] = []

    @BaseFacade.process_manage
    def run(self):
        try:
            for iter in range(1, 1 + self.num_iter):
                self.logger.info('-' * 50)
                self.logger.info("HOIST algorithm: %d/%d iteration starts." %
                                 (iter, self.num_iter))
                start_time = time.time()
                self.iterate()
                time_elapsed = (time.time() - start_time) / 60
                self.logger.info("iteration took %.2f min." % time_elapsed)
                self.iterate_id += 1
                self.save_intemediate_statistics()
        except Exception as e:
            print(e)
            self.logger.error(str(e))
            # clear the immediate result.
            self.remove_immediate_model()

    def iterate(self, skip_last=0):

        for s in reversed(range(self.s_max + 1)):
            # Initial number of configurations
            n = int(ceil(self.B / self.R / (s + 1) * self.eta**s))
            # Initial number of iterations per config
            r = int(self.R * self.eta**(-s))

            # Choose a batch of configurations in different mechanisms.
            start_time = time.time()
            T = self.choose_next(n)
            time_elapsed = time.time() - start_time
            self.logger.info("Choosing next configurations took %.2f sec." %
                             time_elapsed)

            extra_info = None
            last_run_num = None

            for i in range((s + 1) - int(skip_last)):  # Changed from s + 1

                # Run each of the n configs for <iterations> and keep best (n_configs / eta) configurations
                n_configs = n * self.eta**(-i)
                n_iterations = r * self.eta**(i)

                n_iter = n_iterations
                if last_run_num is not None and not self.restart_needed:
                    n_iter -= last_run_num
                last_run_num = n_iterations

                self.logger.info(
                    "HOIST: %d configurations WITH %d units of resource" %
                    (int(n_configs), int(n_iterations)))

                ret_val, early_stops = self.run_in_parallel(
                    T, n_iter, extra_info)
                val_losses = [item['loss'] for item in ret_val]
                ref_list = [item['ref_id'] for item in ret_val]

                self.target_x[int(n_iterations)].extend(T)
                self.target_y[int(n_iterations)].extend(val_losses)

                if int(n_iterations) == self.R:
                    self.incumbent_configs.extend(T)
                    self.incumbent_obj.extend(val_losses)

                # Select a number of well-performed configurations for the next loop.
                indices = np.argsort(val_losses)
                if len(T) == sum(early_stops):
                    break
                if len(T) >= self.eta:
                    T = [T[i] for i in indices if not early_stops[i]]
                    extra_info = [
                        ref_list[i] for i in indices if not early_stops[i]
                    ]
                    reduced_num = int(n_configs / self.eta)
                    T = T[0:reduced_num]
                    extra_info = extra_info[0:reduced_num]
                else:
                    T = [T[indices[0]]]
                    extra_info = [ref_list[indices[0]]]
                incumbent_loss = val_losses[indices[0]]
                self.add_stage_history(
                    self.stage_id, min(self.global_incumbent, incumbent_loss))
                self.stage_id += 1
            self.remove_immediate_model()

            # Augment the intermediate evaluation data.
            for item in self.iterate_r[self.iterate_r.index(r):]:
                # objective value normalization: min-max linear normalization
                normalized_y = minmax_normalization(self.target_y[item])
                self.weighted_surrogate.train(convert_configurations_to_array(
                    self.target_x[item]),
                                              np.array(normalized_y,
                                                       dtype=np.float64),
                                              r=item)
            # Update the parameter in the ensemble model.
            if len(self.target_y[self.iterate_r[-1]]) >= 2:
                self.update_weight()

    def choose_next(self, num_config):
        if len(self.target_y[self.iterate_r[-1]]) == 0:
            return sample_configurations(self.config_space, num_config)

        conf_cnt = 0
        next_configs = []
        total_cnt = 0

        incumbent = dict()
        max_r = self.iterate_r[-1]
        best_index = np.argmin(self.target_y[max_r])
        incumbent['config'] = self.target_x[max_r][best_index]
        approximate_obj = self.weighted_surrogate.predict(
            convert_configurations_to_array([incumbent['config']]))[0]
        incumbent['obj'] = approximate_obj
        self.weighted_acquisition_func.update(model=self.weighted_surrogate,
                                              eta=incumbent)

        while conf_cnt < num_config and total_cnt < 2 * num_config:
            rand_config = self.weighted_acq_optimizer.maximize(batch_size=1)[0]
            if rand_config not in next_configs:
                next_configs.append(rand_config)
                conf_cnt += 1
            total_cnt += 1
        if conf_cnt < num_config:
            next_configs = expand_configurations(next_configs,
                                                 self.config_space, num_config)
        return next_configs

    def update_weight(self):
        max_r = self.iterate_r[-1]
        r_list = self.iterate_r

        incumbent_configs = self.target_x[max_r]
        test_x = convert_configurations_to_array(incumbent_configs)
        test_y = minmax_normalization(self.target_y[max_r])

        predictions = []
        for i, r in enumerate(r_list[:-1]):
            mean, _ = self.weighted_surrogate.surrogate_container[r].predict(
                test_x)
            predictions.append(mean.flatten().tolist())
        predictions.append(
            self.obtain_cv_prediction(test_x, np.array(test_y,
                                                       dtype=np.float64)))
        solution, status = self.solve_optpro(
            np.mat(predictions).T,
            np.mat(test_y).T)
        if status:
            solution[solution < 1e-3] = 0.
            self.logger.info('New weight: %s' % str(solution))
            for i, r in enumerate(r_list):
                self.weighted_surrogate.surrogate_weight[r] = solution[i]

    def obtain_cv_prediction(self, X, y):
        types, bounds = get_types(self.config_space)
        base_model = RandomForestWithInstances(types=types, bounds=bounds)
        instance_num = len(y)
        output_pred = []
        if instance_num < 10:
            for i in range(instance_num):
                row_indexs = list(range(instance_num))
                del row_indexs[i]
                base_model.train(X[row_indexs], y[row_indexs])
                mu, _ = base_model.predict(X)
                output_pred.append(mu[i, 0])
        else:
            # Conduct 5-fold cross validation.
            K = 5
            fold_num = instance_num // K
            for i in range(K):
                row_indexs = list(range(instance_num))
                bound = (instance_num - i * fold_num) if i == (K -
                                                               1) else fold_num
                for index in range(bound):
                    del row_indexs[i * fold_num]
                base_model.train(X[row_indexs, :], y[row_indexs])
                mu, _ = base_model.predict(X)
                start = i * fold_num
                end = start + bound
                output_pred.extend(mu[start:end, 0].tolist())
        assert len(output_pred) == instance_num
        return output_pred

    def solve_optpro(self, pred_y, true_y, debug=False):

        # The optimization function.
        def Loss_func(true_y, pred_y):
            # Compute the rank loss for varied loss function.
            true_y = np.array(true_y)[:, 0]
            pred_y = np.array(pred_y)[:, 0]
            comb = itertools.combinations(range(true_y.shape[0]), 2)
            pairs = list()
            # Compute the pairs.
            for _, (i, j) in enumerate(comb):
                if true_y[i] > true_y[j]:
                    pairs.append((i, j))
                elif true_y[i] < true_y[j]:
                    pairs.append((j, i))
            loss = 0.
            pair_num = len(pairs)
            if pair_num == 0:
                return 0.
            for (i, j) in pairs:
                loss += np.log(1 + np.exp(pred_y[j] - pred_y[i]))
            return loss

        # The derivative function.
        def Loss_der(true_y, A, x):
            y_pred = A * np.mat(x).T
            true_y = np.array(true_y)[:, 0]
            pred_y = np.array(y_pred)[:, 0]

            comb = itertools.combinations(range(true_y.shape[0]), 2)
            pairs = list()
            # Compute the pairs.
            for _, (i, j) in enumerate(comb):
                if true_y[i] > true_y[j]:
                    pairs.append((i, j))
                elif true_y[i] < true_y[j]:
                    pairs.append((j, i))
            # Calculate the derivatives.
            grad = np.zeros(A.shape[1])
            pair_num = len(pairs)
            if pair_num == 0:
                return grad
            for (i, j) in pairs:
                e_z = np.exp(pred_y[j] - pred_y[i])
                grad += e_z / (1 + e_z) * (A[j] - A[i]).A1
            return grad

        A, b = pred_y, true_y
        n, m = A.shape
        # Add constraints.
        ineq_cons = {
            'type': 'ineq',
            'fun': lambda x: np.array(x),
            'jac': lambda x: np.eye(len(x))
        }
        eq_cons = {
            'type': 'eq',
            'fun': lambda x: np.array([sum(x) - 1]),
            'jac': lambda x: np.array([1.] * len(x))
        }

        x0 = np.array([1. / m] * m)

        def f(x):
            w = np.mat(x).T
            return Loss_func(b, A * w)

        def f_der(x):
            return Loss_der(b, A, x)

        res = minimize(f,
                       x0,
                       method='SLSQP',
                       jac=f_der,
                       constraints=[eq_cons, ineq_cons],
                       options={
                           'ftol': 1e-8,
                           'disp': False
                       })

        status = False if np.isnan(res.x).any() else True
        if not res.success and status:
            res.x[res.x < 0.] = 0.
            res.x[res.x > 1.] = 1.
            if sum(res.x) > 1.5:
                status = False
        if debug:
            print('the objective', f(res.x))
        return res.x, status

    def get_incumbent(self, num_inc=1):
        assert (len(self.incumbent_obj) == len(self.incumbent_configs))
        indices = np.argsort(self.incumbent_obj)
        return [self.incumbent_configs[i] for i in indices[0:num_inc]], \
               [self.incumbent_obj[i] for i in indices[0: num_inc]]

Example #6

Show file

    def __init__(self,
                 config_space,
                 objective_func,
                 R,
                 num_iter=10,
                 eta=3,
                 p=0.5,
                 n_workers=1,
                 info_type='Weighted',
                 rho_delta=0.1,
                 init_weight=None,
                 update_enable=False,
                 random_mode=True,
                 enable_rho=True,
                 scale_method=1,
                 init_rho=0.8):
        BaseFacade.__init__(self, objective_func, n_workers=n_workers)
        self.config_space = config_space
        self.p = p
        self.R = R
        self.eta = eta
        self.logeta = lambda x: log(x) / log(self.eta)
        self.s_max = int(self.logeta(self.R))
        self.B = (self.s_max + 1) * self.R
        self.num_iter = num_iter
        self.info_type = info_type
        self.update_enable = update_enable
        self.random_mode = random_mode

        self.enable_rho = enable_rho
        self.rho = init_rho
        self.rho_delta = rho_delta
        self.min_rho = 0.5
        self.scale_method = scale_method
        self.weight_update_id = 0

        if init_weight is None:
            init_weight = [1. / (self.s_max + 1)] * (self.s_max + 1)
        self.logger.info("initial confidence weight %s" %
                         init_weight[:self.s_max + 1])
        types, bounds = get_types(config_space)
        self.num_config = len(bounds)
        self.surrogate = RandomForestWithInstances(types=types, bounds=bounds)
        self.acquisition_func = EI(model=self.surrogate)
        self.acq_optimizer = RandomSampling(self.acquisition_func,
                                            config_space,
                                            n_samples=max(
                                                500, 50 * self.num_config))

        if info_type == 'Weighted':
            self.weighted_surrogate = WeightedRandomForestCluster(
                types, bounds, self.s_max, self.eta, init_weight, 'lc')
            self.weighted_acquisition_func = EI(model=self.weighted_surrogate)
            self.weighted_acq_optimizer = RandomSampling(
                self.weighted_acquisition_func,
                config_space,
                n_samples=max(500, 50 * self.num_config))

        self.incumbent_configs = []
        self.incumbent_obj = []
        self.init_tradeoff = 0.5
        self.tradeoff_dec_rate = 0.8
        self.iterate_id = 0
        self.iterate_r = []
        self.hist_weights = list()

        self.target_x = dict()
        self.target_y = dict()
        for index, item in enumerate(
                np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)):
            r = int(item)
            self.iterate_r.append(r)
            self.target_x[r] = []
            self.target_y[r] = []

Example #7

Show file

class XFHB(BaseFacade):
    def __init__(self,
                 config_space,
                 objective_func,
                 R,
                 num_iter=10,
                 eta=3,
                 p=0.5,
                 n_workers=1,
                 info_type='Weighted',
                 rho_delta=0.1,
                 init_weight=None,
                 update_enable=False,
                 random_mode=True,
                 enable_rho=True,
                 scale_method=1,
                 init_rho=0.8):
        BaseFacade.__init__(self, objective_func, n_workers=n_workers)
        self.config_space = config_space
        self.p = p
        self.R = R
        self.eta = eta
        self.logeta = lambda x: log(x) / log(self.eta)
        self.s_max = int(self.logeta(self.R))
        self.B = (self.s_max + 1) * self.R
        self.num_iter = num_iter
        self.info_type = info_type
        self.update_enable = update_enable
        self.random_mode = random_mode

        self.enable_rho = enable_rho
        self.rho = init_rho
        self.rho_delta = rho_delta
        self.min_rho = 0.5
        self.scale_method = scale_method
        self.weight_update_id = 0

        if init_weight is None:
            init_weight = [1. / (self.s_max + 1)] * (self.s_max + 1)
        self.logger.info("initial confidence weight %s" %
                         init_weight[:self.s_max + 1])
        types, bounds = get_types(config_space)
        self.num_config = len(bounds)
        self.surrogate = RandomForestWithInstances(types=types, bounds=bounds)
        self.acquisition_func = EI(model=self.surrogate)
        self.acq_optimizer = RandomSampling(self.acquisition_func,
                                            config_space,
                                            n_samples=max(
                                                500, 50 * self.num_config))

        if info_type == 'Weighted':
            self.weighted_surrogate = WeightedRandomForestCluster(
                types, bounds, self.s_max, self.eta, init_weight, 'lc')
            self.weighted_acquisition_func = EI(model=self.weighted_surrogate)
            self.weighted_acq_optimizer = RandomSampling(
                self.weighted_acquisition_func,
                config_space,
                n_samples=max(500, 50 * self.num_config))

        self.incumbent_configs = []
        self.incumbent_obj = []
        self.init_tradeoff = 0.5
        self.tradeoff_dec_rate = 0.8
        self.iterate_id = 0
        self.iterate_r = []
        self.hist_weights = list()

        self.target_x = dict()
        self.target_y = dict()
        for index, item in enumerate(
                np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)):
            r = int(item)
            self.iterate_r.append(r)
            self.target_x[r] = []
            self.target_y[r] = []

    def iterate(self, skip_last=0):

        for s in reversed(range(self.s_max + 1)):

            if self.update_enable and self.weight_update_id > self.s_max:
                self.update_weight_vector()
            self.weight_update_id += 1

            # initial number of configurations
            n = int(ceil(self.B / self.R / (s + 1) * self.eta**s))
            # initial number of iterations per config
            r = int(self.R * self.eta**(-s))

            # choose a batch of configurations in different mechanisms.
            start_time = time.time()
            if self.info_type != 'Weighted':
                T = self.choose_next(n, r, self.info_type)
            else:
                T = self.choose_next_weighted(n)
            time_elapsed = time.time() - start_time
            self.logger.info("choosing next configurations took %.2f sec." %
                             time_elapsed)

            extra_info = None
            last_run_num = None

            for i in range((s + 1) - int(skip_last)):  # changed from s + 1

                # Run each of the n configs for <iterations>
                # and keep best (n_configs / eta) configurations

                n_configs = n * self.eta**(-i)
                n_iterations = r * self.eta**(i)

                n_iter = n_iterations
                if last_run_num is not None and not self.restart_needed:
                    n_iter -= last_run_num
                last_run_num = n_iterations

                self.logger.info(
                    "XFHB-%s: %d configurations x %d iterations each" %
                    (self.info_type, int(n_configs), int(n_iterations)))

                ret_val, early_stops = self.run_in_parallel(
                    T, n_iter, extra_info)
                val_losses = [item['loss'] for item in ret_val]
                ref_list = [item['ref_id'] for item in ret_val]

                self.target_x[int(n_iterations)].extend(T)
                self.target_y[int(n_iterations)].extend(val_losses)

                if int(n_iterations) == self.R:
                    self.incumbent_configs.extend(T)
                    self.incumbent_obj.extend(val_losses)
                # select a number of best configurations for the next loop
                # filter out early stops, if any
                indices = np.argsort(val_losses)
                if len(T) == sum(early_stops):
                    break
                if len(T) >= self.eta:
                    T = [T[i] for i in indices if not early_stops[i]]
                    extra_info = [
                        ref_list[i] for i in indices if not early_stops[i]
                    ]
                    reduced_num = int(n_configs / self.eta)
                    T = T[0:reduced_num]
                    extra_info = extra_info[0:reduced_num]
                else:
                    T = [T[indices[0]]]
                    extra_info = [ref_list[indices[0]]]
                incumbent_loss = val_losses[indices[0]]
                self.add_stage_history(
                    self.stage_id, min(self.global_incumbent, incumbent_loss))
                self.stage_id += 1
            self.remove_immediate_model()

            if self.info_type == 'Weighted':
                for item in self.iterate_r[self.iterate_r.index(r):]:
                    # objective value normalization: min-max linear normalization
                    normalized_y = minmax_normalization(self.target_y[item])
                    self.weighted_surrogate.train(
                        convert_configurations_to_array(self.target_x[item]),
                        np.array(normalized_y, dtype=np.float64),
                        r=item)
        # TODO: trade off value: decay (bayesian optimization did? do we need trade off e&e again?)
        self.init_tradeoff *= self.tradeoff_dec_rate

    def update_rho(self):
        if self.rho > self.min_rho:
            if self.rho - self.rho_delta < self.min_rho:
                self.rho = self.min_rho
            else:
                self.rho -= self.rho_delta

    @BaseFacade.process_manage
    def run(self):
        try:
            for iter in range(1, 1 + self.num_iter):
                self.logger.info('-' * 50)
                self.logger.info("XFHB algorithm: %d/%d iteration starts" %
                                 (iter, self.num_iter))
                start_time = time.time()
                self.iterate()
                time_elapsed = (time.time() - start_time) / 60
                self.logger.info("iteration took %.2f min." % time_elapsed)
                self.iterate_id += 1
                if self.enable_rho:
                    self.update_rho()
                self.save_intemediate_statistics()
        except Exception as e:
            print(e)
            self.logger.error(str(e))
            # clear the immediate result.
            self.remove_immediate_model()

    # TODO: how to utilize the final incmbents' data.
    # TODO: forward/backward target distribution utilization.
    def choose_next(self, num_config, r, mode):
        # different types of mode.
        if mode == 'Hybrid':
            mode = 'Backward' if self.iterate_id % 2 == 0 else 'Forward'

        if mode == 'Forward':
            if r != self.R:
                r *= self.eta
        elif mode == 'Backward':
            if r != 1:
                r /= self.eta
            else:
                r = self.R

        # TODO: in different types, this condition may not needed any more.
        n_exp = len(self.target_y[r])
        if n_exp < 2 * self.num_config:
            return sample_configurations(self.config_space, num_config)

        self.logger.info('train feature is: %s' % str(self.target_x[r]))
        self.logger.info('train target is: %s' % str(self.target_y[r]))

        self.surrogate.train(convert_configurations_to_array(self.target_x[r]),
                             np.array(self.target_y[r], dtype=np.float64))

        conf_cnt = 0
        next_configs = []
        total_cnt = 0
        # TODO: acceleration, maximize a batch of candidates.
        while conf_cnt < num_config and total_cnt < 5 * num_config:
            rand_config = None
            if random.uniform(0, 1) < self.init_tradeoff:
                rand_config = self.config_space.sample_configuration(1)
            else:
                # print('use surrogate to produce candidate.')
                incumbent = dict()
                incumbent['obj'] = np.min(self.target_y[r])
                incumbent['config'] = self.target_x[r][np.argmin(
                    self.target_y[r])]

                self.acquisition_func.update(model=self.surrogate,
                                             eta=incumbent)
                rand_config = self.acq_optimizer.maximize(batch_size=1)[0]
            if rand_config not in next_configs:
                next_configs.append(rand_config)
                conf_cnt += 1
            total_cnt += 1

        if conf_cnt < num_config:
            next_configs = expand_configurations(next_configs,
                                                 self.config_space, num_config)

        return next_configs

    def choose_next_weighted(self, num_config):
        if len(self.target_y[self.iterate_r[-1]]) == 0:
            return sample_configurations(self.config_space, num_config)

        conf_cnt = 0
        next_configs = []
        total_cnt = 0

        while conf_cnt < num_config and total_cnt < 2 * num_config:
            # in Bayesian optimization, eliminate epsilon sampling.
            incumbent = dict()
            # TODO: problem-->use the best in maximal resource.
            # TODO: smac's optmization algorithm.
            max_r = self.iterate_r[-1]
            best_index = np.argmin(self.target_y[max_r])
            incumbent['config'] = self.target_x[max_r][best_index]
            approximate_obj = self.weighted_surrogate.predict(
                convert_configurations_to_array([incumbent['config']]))[0]
            incumbent['obj'] = approximate_obj

            self.weighted_acquisition_func.update(
                model=self.weighted_surrogate, eta=incumbent)
            rand_config = self.weighted_acq_optimizer.maximize(batch_size=1)[0]

            if rand_config not in next_configs:
                next_configs.append(rand_config)
                conf_cnt += 1
            total_cnt += 1

        if conf_cnt < num_config:
            next_configs = expand_configurations(next_configs,
                                                 self.config_space, num_config)
        return next_configs

    def update_weight_vector(self):
        rho = self.rho
        max_r = self.iterate_r[-1]
        incumbent_configs = self.target_x[max_r]
        test_x = convert_configurations_to_array(incumbent_configs)
        test_y = minmax_normalization(self.target_y[max_r])

        r_list = self.weighted_surrogate.surrogate_r
        cur_confidence = self.weighted_surrogate.surrogate_weight
        curr_list = [cur_confidence[r] for r in r_list[:-1]]

        # calculate correlation coefficient.
        corrcoef_list = []
        for i, r in enumerate(r_list):
            mean, _ = self.weighted_surrogate.surrogate_container[r].predict(
                test_x)
            tmp_y = np.reshape(mean, -1)
            # corrcoef = np.corrcoef(np.vstack((test_y, tmp_y)))[0][1]/2 + 0.5
            corrcoef = max(0, np.corrcoef(np.vstack((test_y, tmp_y)))[0][1])
            corrcoef_list.append(corrcoef)
        corrcoef_list = np.array(corrcoef_list)
        self.logger.info('scale method %d, before normalization: %s' %
                         (self.scale_method, str(corrcoef_list)))

        if self.scale_method <= 4:
            corrcoef_list = corrcoef_list**self.scale_method / sum(
                corrcoef_list**self.scale_method)
        elif self.scale_method == 6:
            corrcoef_list = corrcoef_list**2 / sum(corrcoef_list**2)
        elif self.scale_method == 7:
            ref = corrcoef_list**2
            follow_list = (corrcoef_list != max(corrcoef_list)) * ref
            corrcoef_list = follow_list / sum(follow_list) * 0.5 + (
                corrcoef_list == max(corrcoef_list)) * 0.5

        self.logger.info('after normalization: %s' % str(corrcoef_list))

        if sum(np.isnan(corrcoef_list)) == len(corrcoef_list):
            corrcoef_list = [item for item in curr_list]
            self.logger.info('escape nan, current update', corrcoef_list)

        assert len(cur_confidence) == len(corrcoef_list)
        self.logger.info('cur rho %.3f; conf vector/update vector: %s/%s' %
                         (self.rho, str(cur_confidence), str(corrcoef_list)))

        updated_weights = list()
        for i, r in enumerate(r_list):
            if self.scale_method == 7:
                self.weighted_surrogate.surrogate_weight[r] = corrcoef_list[i]
            else:
                self.weighted_surrogate.surrogate_weight[
                    r] = corrcoef_list[i] * (1 - rho) + rho * cur_confidence[r]
            updated_weights.append(self.weighted_surrogate.surrogate_weight[r])
            print('update surrogate weight:', r,
                  self.weighted_surrogate.surrogate_weight[r])
            self.logger.info('update surrogate weight:%d-%.4f' %
                             (r, self.weighted_surrogate.surrogate_weight[r]))
        self.hist_weights.append(updated_weights)
        np.save('data/tmp_weights_%s.npy' % self.method_name,
                np.asarray(self.hist_weights))

    def get_incumbent(self, num_inc=1):
        assert (len(self.incumbent_obj) == len(self.incumbent_configs))
        indices = np.argsort(self.incumbent_obj)
        return [self.incumbent_configs[i] for i in indices[0:num_inc]], \
               [self.incumbent_obj[i] for i in indices[0: num_inc]]

    def get_weights(self):
        return self.hist_weights

Example #8

Show file

File: batch_bo.py Project: Dee-Why/hp-tuner

class SMAC(BaseFacade):
    def __init__(self,
                 config_space,
                 objective_func,
                 R,
                 num_iter=10,
                 n_workers=1,
                 eta=3,
                 random_state=1,
                 method_id="Default"):
        BaseFacade.__init__(self,
                            objective_func,
                            n_workers=n_workers,
                            method_name=method_id)
        self.config_space = config_space
        self.seed = random_state
        self.config_space.seed(self.seed)
        self.R = R
        self.num_iter = num_iter
        self.eta = eta
        self.logeta = lambda x: log(x) / log(self.eta)
        self.s_max = int(self.logeta(R))
        self.inner_iteration_n = (self.s_max + 1) * (self.s_max + 1)

        types, bounds = get_types(config_space)
        self.num_config = len(bounds)
        self.surrogate = RandomForestWithInstances(types=types, bounds=bounds)
        self.acquisition_func = EI(model=self.surrogate)
        # TODO: add SMAC's optimization algorithm.
        self.acq_optimizer = RandomSampling(self.acquisition_func,
                                            config_space,
                                            n_samples=max(
                                                500, 50 * self.num_config))

        self.incumbent_configs = []
        self.incumbent_obj = []
        self.p = 0.3

    def iterate(self):
        n_loop = int(ceil(1.0 * self.inner_iteration_n / self.num_workers))
        for _ in range(n_loop):
            T = self.choose_next(self.num_workers)
            extra_info = None

            ret_val, early_stops = self.run_in_parallel(T, self.R, extra_info)
            val_losses = [item['loss'] for item in ret_val]
            self.incumbent_configs.extend(T)
            self.incumbent_obj.extend(val_losses)
            self.add_stage_history(self.stage_id, self.global_incumbent)
            self.stage_id += 1
            self.remove_immediate_model()

    @BaseFacade.process_manage
    def run(self):
        try:
            for iter in range(self.num_iter):
                self.logger.info('-' * 50)
                self.logger.info("SMAC algorithm: %d/%d iteration starts" %
                                 (iter, self.num_iter))
                start_time = time.time()
                self.iterate()
                time_elapsed = (time.time() - start_time) / 60
                self.logger.info("iteration took %.2f min." % time_elapsed)
                self.save_intemediate_statistics()
        except Exception as e:
            print(e)
            self.logger.error(str(e))
            # clear the immediate result.
            self.remove_immediate_model()

    def choose_next(self, num_config):
        if len(self.incumbent_obj) < 3:
            return sample_configurations(self.config_space, num_config)

        self.logger.info('BO Training - X: %s' %
                         str(self.incumbent_configs[-5:]))
        self.logger.info('BO Training - Y: %s' % str(self.incumbent_obj))
        self.surrogate.train(
            convert_configurations_to_array(self.incumbent_configs),
            np.array(self.incumbent_obj, dtype=np.float64))

        conf_cnt = 0
        total_cnt = 0
        _next_configs = []
        while conf_cnt < num_config and total_cnt < 5 * num_config:
            incumbent = dict()
            best_index = np.argmin(self.incumbent_obj)
            incumbent['obj'] = self.incumbent_obj[best_index]
            incumbent['config'] = self.incumbent_configs[best_index]

            self.acquisition_func.update(model=self.surrogate, eta=incumbent)
            rand_config = self.acq_optimizer.maximize(batch_size=1)[0]
            if rand_config not in _next_configs:
                _next_configs.append(rand_config)
                conf_cnt += 1
            total_cnt += 1
        if conf_cnt < num_config:
            _next_configs = expand_configurations(_next_configs,
                                                  self.config_space,
                                                  num_config)

        next_configs = []

        # Epsilon greedy
        for config in _next_configs:
            if random.random() < self.p:
                next_configs.append(
                    sample_configurations(self.config_space, 1)[0])
            else:
                next_configs.append(config)

        return next_configs

    def get_incumbent(self, num_inc=1):
        assert (len(self.incumbent_obj) == len(self.incumbent_configs))
        indices = np.argsort(self.incumbent_obj)
        configs = [self.incumbent_configs[i] for i in indices[0:num_inc]]
        targets = [self.incumbent_obj[i] for i in indices[0:num_inc]]
        return configs, targets

Example #9

Show file

File: mfse.py Project: Dee-Why/hp-tuner

    def __init__(self,
                 config_space: ConfigurationSpace,
                 objective_func,
                 R,
                 num_iter=10000,
                 eta=3,
                 n_workers=1,
                 random_state=1,
                 init_weight=None,
                 update_enable=True,
                 weight_method='rank_loss_p_norm',
                 fusion_method='gpoe',
                 power_num=2,
                 method_id='Default'):
        BaseFacade.__init__(self,
                            objective_func,
                            n_workers=n_workers,
                            method_name=method_id)
        self.config_space = config_space
        self.R = R
        self.eta = eta
        self.seed = random_state
        self.logeta = lambda x: log(x) / log(self.eta)
        self.s_max = int(self.logeta(self.R))
        self.B = (self.s_max + 1) * self.R
        self.num_iter = num_iter
        self.update_enable = update_enable
        self.fusion_method = fusion_method
        # Parameter for weight method `rank_loss_p_norm`.
        self.power_num = power_num
        # Specify the weight learning method.
        self.weight_method = weight_method
        self.config_space.seed(self.seed)
        self.weight_update_id = 0
        self.weight_changed_cnt = 0

        if init_weight is None:
            init_weight = [0.]
            init_weight.extend([1. / self.s_max] * self.s_max)
        assert len(init_weight) == (self.s_max + 1)
        if self.weight_method == 'equal_weight':
            assert self.update_enable is False
        self.logger.info('Weight method & flag: %s-%s' %
                         (self.weight_method, str(self.update_enable)))
        self.logger.info("Initial weight is: %s" %
                         init_weight[:self.s_max + 1])
        types, bounds = get_types(config_space)
        self.num_config = len(bounds)

        self.weighted_surrogate = WeightedRandomForestCluster(
            types, bounds, self.s_max, self.eta, init_weight,
            self.fusion_method)
        self.acquisition_function = EI(model=self.weighted_surrogate)

        self.incumbent_configs = []
        self.incumbent_perfs = []

        self.iterate_id = 0
        self.iterate_r = []
        self.hist_weights = list()

        # Saving evaluation statistics in Hyperband.
        self.target_x = dict()
        self.target_y = dict()
        for index, item in enumerate(
                np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)):
            r = int(item)
            self.iterate_r.append(r)
            self.target_x[r] = []
            self.target_y[r] = []

        # BO optimizer settings.
        self.configs = list()
        self.history_container = HistoryContainer('mfse-container')
        self.sls_max_steps = None
        self.n_sls_iterations = 5
        self.sls_n_steps_plateau_walk = 10
        rng = np.random.RandomState(seed=random_state)
        self.acq_optimizer = InterleavedLocalAndRandomSearch(
            acquisition_function=self.acquisition_function,
            config_space=self.config_space,
            rng=rng,
            max_steps=self.sls_max_steps,
            n_steps_plateau_walk=self.sls_n_steps_plateau_walk,
            n_sls_iterations=self.n_sls_iterations)
        self._random_search = RandomSearch(self.acquisition_function,
                                           self.config_space,
                                           rng=rng)
        self.random_configuration_chooser = ChooserProb(prob=0.2, rng=rng)

Example #10

Show file

File: mfse.py Project: Dee-Why/hp-tuner

class MFSE(BaseFacade):
    def __init__(self,
                 config_space: ConfigurationSpace,
                 objective_func,
                 R,
                 num_iter=10000,
                 eta=3,
                 n_workers=1,
                 random_state=1,
                 init_weight=None,
                 update_enable=True,
                 weight_method='rank_loss_p_norm',
                 fusion_method='gpoe',
                 power_num=2,
                 method_id='Default'):
        BaseFacade.__init__(self,
                            objective_func,
                            n_workers=n_workers,
                            method_name=method_id)
        self.config_space = config_space
        self.R = R
        self.eta = eta
        self.seed = random_state
        self.logeta = lambda x: log(x) / log(self.eta)
        self.s_max = int(self.logeta(self.R))
        self.B = (self.s_max + 1) * self.R
        self.num_iter = num_iter
        self.update_enable = update_enable
        self.fusion_method = fusion_method
        # Parameter for weight method `rank_loss_p_norm`.
        self.power_num = power_num
        # Specify the weight learning method.
        self.weight_method = weight_method
        self.config_space.seed(self.seed)
        self.weight_update_id = 0
        self.weight_changed_cnt = 0

        if init_weight is None:
            init_weight = [0.]
            init_weight.extend([1. / self.s_max] * self.s_max)
        assert len(init_weight) == (self.s_max + 1)
        if self.weight_method == 'equal_weight':
            assert self.update_enable is False
        self.logger.info('Weight method & flag: %s-%s' %
                         (self.weight_method, str(self.update_enable)))
        self.logger.info("Initial weight is: %s" %
                         init_weight[:self.s_max + 1])
        types, bounds = get_types(config_space)
        self.num_config = len(bounds)

        self.weighted_surrogate = WeightedRandomForestCluster(
            types, bounds, self.s_max, self.eta, init_weight,
            self.fusion_method)
        self.acquisition_function = EI(model=self.weighted_surrogate)

        self.incumbent_configs = []
        self.incumbent_perfs = []

        self.iterate_id = 0
        self.iterate_r = []
        self.hist_weights = list()

        # Saving evaluation statistics in Hyperband.
        self.target_x = dict()
        self.target_y = dict()
        for index, item in enumerate(
                np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)):
            r = int(item)
            self.iterate_r.append(r)
            self.target_x[r] = []
            self.target_y[r] = []

        # BO optimizer settings.
        self.configs = list()
        self.history_container = HistoryContainer('mfse-container')
        self.sls_max_steps = None
        self.n_sls_iterations = 5
        self.sls_n_steps_plateau_walk = 10
        rng = np.random.RandomState(seed=random_state)
        self.acq_optimizer = InterleavedLocalAndRandomSearch(
            acquisition_function=self.acquisition_function,
            config_space=self.config_space,
            rng=rng,
            max_steps=self.sls_max_steps,
            n_steps_plateau_walk=self.sls_n_steps_plateau_walk,
            n_sls_iterations=self.n_sls_iterations)
        self._random_search = RandomSearch(self.acquisition_function,
                                           self.config_space,
                                           rng=rng)
        self.random_configuration_chooser = ChooserProb(prob=0.2, rng=rng)

    def iterate(self, skip_last=0):

        for s in reversed(range(self.s_max + 1)):

            if self.update_enable and self.weight_update_id > self.s_max:
                self.update_weight()
            self.weight_update_id += 1

            # Set initial number of configurations
            n = int(ceil(self.B / self.R / (s + 1) * self.eta**s))
            # initial number of iterations per config
            r = int(self.R * self.eta**(-s))

            # Choose a batch of configurations in different mechanisms.
            start_time = time.time()
            T = self.choose_next_batch(n)
            time_elapsed = time.time() - start_time
            self.logger.info(
                "[%s] Choosing next configurations took %.2f sec." %
                (self.method_name, time_elapsed))

            extra_info = None
            last_run_num = None

            for i in range((s + 1) - int(skip_last)):  # changed from s + 1

                # Run each of the n configs for <iterations>
                # and keep best (n_configs / eta) configurations

                n_configs = n * self.eta**(-i)
                n_iterations = r * self.eta**(i)

                n_iter = n_iterations
                if last_run_num is not None and not self.restart_needed:
                    n_iter -= last_run_num
                last_run_num = n_iterations

                self.logger.info(
                    "MFSE: %d configurations x %d iterations each" %
                    (int(n_configs), int(n_iterations)))

                ret_val, early_stops = self.run_in_parallel(
                    T, n_iter, extra_info)
                val_losses = [item['loss'] for item in ret_val]
                ref_list = [item['ref_id'] for item in ret_val]

                self.target_x[int(n_iterations)].extend(T)
                self.target_y[int(n_iterations)].extend(val_losses)

                if int(n_iterations) == self.R:
                    self.incumbent_configs.extend(T)
                    self.incumbent_perfs.extend(val_losses)
                    # Update history container.
                    for _config, _perf in zip(T, val_losses):
                        self.history_container.add(_config, _perf)

                # Select a number of best configurations for the next loop.
                # Filter out early stops, if any.
                indices = np.argsort(val_losses)
                if len(T) == sum(early_stops):
                    break
                if len(T) >= self.eta:
                    T = [T[i] for i in indices if not early_stops[i]]
                    extra_info = [
                        ref_list[i] for i in indices if not early_stops[i]
                    ]
                    reduced_num = int(n_configs / self.eta)
                    T = T[0:reduced_num]
                    extra_info = extra_info[0:reduced_num]
                else:
                    T = [T[indices[0]]]
                    extra_info = [ref_list[indices[0]]]
                incumbent_loss = val_losses[indices[0]]
                self.add_stage_history(
                    self.stage_id, min(self.global_incumbent, incumbent_loss))
                self.stage_id += 1
            self.remove_immediate_model()

            for item in self.iterate_r[self.iterate_r.index(r):]:
                # NORMALIZE Objective value: normalization
                normalized_y = std_normalization(self.target_y[item])
                self.weighted_surrogate.train(convert_configurations_to_array(
                    self.target_x[item]),
                                              np.array(normalized_y,
                                                       dtype=np.float64),
                                              r=item)

    @BaseFacade.process_manage
    def run(self):
        try:
            for iter in range(1, 1 + self.num_iter):
                self.logger.info('-' * 50)
                self.logger.info("MFSE algorithm: %d/%d iteration starts" %
                                 (iter, self.num_iter))
                start_time = time.time()
                self.iterate()
                time_elapsed = (time.time() - start_time) / 60
                self.logger.info("%d/%d-Iteration took %.2f min." %
                                 (iter, self.num_iter, time_elapsed))
                self.iterate_id += 1
                self.save_intemediate_statistics()
        except Exception as e:
            print(e)
            self.logger.error(str(e))
            # clear the immediate result.
            self.remove_immediate_model()

    def get_bo_candidates(self, num_configs):
        incumbent = dict()
        incumbent_value = np.min(
            std_normalization(self.target_y[self.iterate_r[-1]]))
        incumbent['config'] = self.history_container.get_incumbents()[0][1]
        incumbent['obj'] = incumbent_value
        print('Current inc', incumbent)
        # incumbent_value = self.history_container.get_incumbents()[0][1]
        # Update surrogate model in acquisition function.
        self.acquisition_function.update(model=self.weighted_surrogate,
                                         eta=incumbent,
                                         num_data=len(
                                             self.history_container.data))

        challengers = self.acq_optimizer.maximize(
            runhistory=self.history_container,
            num_points=5000,
            random_configuration_chooser=self.random_configuration_chooser)
        return challengers.challengers[:num_configs]

    def choose_next_batch(self, num_config):
        if len(self.target_y[self.iterate_r[-1]]) == 0:
            configs = [self.config_space.sample_configuration()]
            configs.extend(
                sample_configurations(self.config_space, num_config - 1))
            self.configs.extend(configs)
            return configs

        config_candidates = list()
        acq_configs = self.get_bo_candidates(num_configs=2 * num_config)
        acq_idx = 0
        for idx in range(1, 1 + 2 * num_config):
            # Like BOHB, sample a fixed percentage of random configurations.
            if self.random_configuration_chooser.check(idx):
                _config = self.config_space.sample_configuration()
            else:
                _config = acq_configs[acq_idx]
                acq_idx += 1
            if _config not in config_candidates:
                config_candidates.append(_config)
            if len(config_candidates) >= num_config:
                break

        if len(config_candidates) < num_config:
            config_candidates = expand_configurations(config_candidates,
                                                      self.config_space,
                                                      num_config)

        _config_candidates = []
        for config in config_candidates:
            if config not in self.configs:  # Check if evaluated
                _config_candidates.append(config)
        self.configs.extend(_config_candidates)
        return _config_candidates

    @staticmethod
    def calculate_ranking_loss(y_pred, y_true):
        length = len(y_pred)
        y_pred = np.reshape(y_pred, -1)
        y_pred1 = np.tile(y_pred, (length, 1))
        y_pred2 = np.transpose(y_pred1)
        diff = y_pred1 - y_pred2
        y_true = np.reshape(y_true, -1)
        y_true1 = np.tile(y_true, (length, 1))
        y_true2 = np.transpose(y_true1)
        y_mask = (y_true1 - y_true2 > 0) + 0
        loss = np.sum(np.log(1 + np.exp(-diff)) * y_mask) / length
        return loss

    @staticmethod
    def calculate_preserving_order_num(y_pred, y_true):
        array_size = len(y_pred)
        assert len(y_true) == array_size

        total_pair_num, order_preserving_num = 0, 0
        for idx in range(array_size):
            for inner_idx in range(idx + 1, array_size):
                if bool(y_true[idx] > y_true[inner_idx]) == bool(
                        y_pred[idx] > y_pred[inner_idx]):
                    order_preserving_num += 1
                total_pair_num += 1
        return order_preserving_num, total_pair_num

    def update_weight(self):
        max_r = self.iterate_r[-1]
        incumbent_configs = self.target_x[max_r]
        test_x = convert_configurations_to_array(incumbent_configs)
        test_y = np.array(self.target_y[max_r], dtype=np.float64)

        r_list = self.weighted_surrogate.surrogate_r
        K = len(r_list)

        if len(test_y) >= 3:
            # Get previous weights
            if self.weight_method in [
                    'rank_loss_softmax', 'rank_loss_single', 'rank_loss_p_norm'
            ]:
                preserving_order_p = list()
                preserving_order_nums = list()
                for i, r in enumerate(r_list):
                    fold_num = 5
                    if i != K - 1:
                        mean, var = self.weighted_surrogate.surrogate_container[
                            r].predict(test_x)
                        tmp_y = np.reshape(mean, -1)
                        preorder_num, pair_num = MFSE.calculate_preserving_order_num(
                            tmp_y, test_y)
                        preserving_order_p.append(preorder_num / pair_num)
                        preserving_order_nums.append(preorder_num)
                    else:
                        if len(test_y) < 2 * fold_num:
                            preserving_order_p.append(0)
                        else:
                            # 5-fold cross validation.
                            kfold = KFold(n_splits=fold_num)
                            cv_pred = np.array([0] * len(test_y))
                            for train_idx, valid_idx in kfold.split(test_x):
                                train_configs, train_y = test_x[
                                    train_idx], test_y[train_idx]
                                valid_configs, valid_y = test_x[
                                    valid_idx], test_y[valid_idx]
                                types, bounds = get_types(self.config_space)
                                _surrogate = RandomForestWithInstances(
                                    types=types, bounds=bounds)
                                _surrogate.train(train_configs, train_y)
                                pred, _ = _surrogate.predict(valid_configs)
                                cv_pred[valid_idx] = pred.reshape(-1)
                            preorder_num, pair_num = MFSE.calculate_preserving_order_num(
                                cv_pred, test_y)
                            preserving_order_p.append(preorder_num / pair_num)
                            preserving_order_nums.append(preorder_num)

                if self.weight_method == 'rank_loss_softmax':
                    order_weight = np.array(np.sqrt(preserving_order_nums))
                    trans_order_weight = order_weight - np.max(order_weight)
                    # Softmax mapping.
                    new_weights = np.exp(trans_order_weight) / sum(
                        np.exp(trans_order_weight))
                elif self.weight_method == 'rank_loss_p_norm':
                    trans_order_weight = np.array(preserving_order_p)
                    power_sum = np.sum(
                        np.power(trans_order_weight, self.power_num))
                    new_weights = np.power(trans_order_weight,
                                           self.power_num) / power_sum
                else:
                    _idx = np.argmax(np.array(preserving_order_nums))
                    new_weights = [0.] * K
                    new_weights[_idx] = 1.
            elif self.weight_method == 'rank_loss_prob':
                # For basic surrogate i=1:K-1.
                mean_list, var_list = list(), list()
                for i, r in enumerate(r_list[:-1]):
                    mean, var = self.weighted_surrogate.surrogate_container[
                        r].predict(test_x)
                    mean_list.append(np.reshape(mean, -1))
                    var_list.append(np.reshape(var, -1))
                sample_num = 100
                min_probability_array = [0] * K
                for _ in range(sample_num):
                    order_preseving_nums = list()

                    # For basic surrogate i=1:K-1.
                    for idx in range(K - 1):
                        sampled_y = np.random.normal(mean_list[idx],
                                                     var_list[idx])
                        _num, _ = MFSE.calculate_preserving_order_num(
                            sampled_y, test_y)
                        order_preseving_nums.append(_num)

                    fold_num = 5
                    # For basic surrogate i=K. cv
                    if len(test_y) < 2 * fold_num:
                        order_preseving_nums.append(0)
                    else:
                        # 5-fold cross validation.
                        kfold = KFold(n_splits=fold_num)
                        cv_pred = np.array([0] * len(test_y))
                        for train_idx, valid_idx in kfold.split(test_x):
                            train_configs, train_y = test_x[train_idx], test_y[
                                train_idx]
                            valid_configs, valid_y = test_x[valid_idx], test_y[
                                valid_idx]
                            types, bounds = get_types(self.config_space)
                            _surrogate = RandomForestWithInstances(
                                types=types, bounds=bounds)
                            _surrogate.train(train_configs, train_y)
                            _pred, _var = _surrogate.predict(valid_configs)
                            sampled_pred = np.random.normal(
                                _pred.reshape(-1), _var.reshape(-1))
                            cv_pred[valid_idx] = sampled_pred
                        _num, _ = MFSE.calculate_preserving_order_num(
                            cv_pred, test_y)
                        order_preseving_nums.append(_num)
                    max_id = np.argmax(order_preseving_nums)
                    min_probability_array[max_id] += 1
                new_weights = np.array(min_probability_array) / sample_num

            elif self.weight_method == 'opt_based':
                mean_list, var_list = list(), list()
                for i, r in enumerate(r_list):
                    if i != K - 1:
                        mean, var = self.weighted_surrogate.surrogate_container[
                            r].predict(test_x)
                        tmp_y = np.reshape(mean, -1)
                        tmp_var = np.reshape(var, -1)
                        mean_list.append(tmp_y)
                        var_list.append(tmp_var)
                    else:
                        if len(test_y) < 8:
                            mean_list.append(np.array([0] * len(test_y)))
                            var_list.append(np.array([0] * len(test_y)))
                        else:
                            # 5-fold cross validation.
                            kfold = KFold(n_splits=5)
                            cv_pred = np.array([0] * len(test_y))
                            cv_var = np.array([0] * len(test_y))
                            for train_idx, valid_idx in kfold.split(test_x):
                                train_configs, train_y = test_x[
                                    train_idx], test_y[train_idx]
                                valid_configs, valid_y = test_x[
                                    valid_idx], test_y[valid_idx]
                                types, bounds = get_types(self.config_space)
                                _surrogate = RandomForestWithInstances(
                                    types=types, bounds=bounds)
                                _surrogate.train(train_configs, train_y)
                                pred, var = _surrogate.predict(valid_configs)
                                cv_pred[valid_idx] = pred.reshape(-1)
                                cv_var[valid_idx] = var.reshape(-1)
                            mean_list.append(cv_pred)
                            var_list.append(cv_var)
                means = np.array(mean_list)
                vars = np.array(var_list) + 1e-8

                def min_func(x):
                    x = np.reshape(np.array(x), (1, len(x)))
                    ensemble_vars = 1 / (x @ (1 / vars))
                    ensemble_means = x @ (means / vars) * ensemble_vars
                    ensemble_means = np.reshape(ensemble_means, -1)
                    self.logger.info("Loss:" + str(x))
                    return MFSE.calculate_ranking_loss(ensemble_means, test_y)

                constraints = [{
                    'type': 'eq',
                    'fun': lambda x: np.sum(x) - 1
                }, {
                    'type': 'ineq',
                    'fun': lambda x: x - 0
                }, {
                    'type': 'ineq',
                    'fun': lambda x: 1 - x
                }]
                res = minimize(min_func,
                               np.array([1e-8] * K),
                               constraints=constraints)
                new_weights = res.x
            else:
                raise ValueError('Invalid weight method: %s!' %
                                 self.weight_method)
        else:
            old_weights = list()
            for i, r in enumerate(r_list):
                _weight = self.weighted_surrogate.surrogate_weight[r]
                old_weights.append(_weight)
            new_weights = old_weights.copy()

        self.logger.info(
            '[%s] %d-th Updating weights: %s' %
            (self.weight_method, self.weight_changed_cnt, str(new_weights)))

        # Assign the weight to each basic surrogate.
        for i, r in enumerate(r_list):
            self.weighted_surrogate.surrogate_weight[r] = new_weights[i]
        self.weight_changed_cnt += 1
        # Save the weight data.
        self.hist_weights.append(new_weights)
        np.save(
            'data/%s_weights_%s.npy' % (self.method_name, self.method_name),
            np.asarray(self.hist_weights))

    def get_incumbent(self, num_inc=1):
        assert (len(self.incumbent_perfs) == len(self.incumbent_configs))
        indices = np.argsort(self.incumbent_perfs)
        return [self.incumbent_configs[i] for i in indices[0:num_inc]], \
               [self.incumbent_perfs[i] for i in indices[0: num_inc]]

    def get_weights(self):
        return self.hist_weights

Example #11

Show file

class SMAC_ES(BaseFacade):

    def __init__(self, config_space, objective_func, R,
                 num_iter=10, n_workers=1, eta=3, es_gap=9, rho=0.7,
                 random_state=1, method_id="Default"):
        BaseFacade.__init__(self, objective_func, n_workers=n_workers, need_lc=True, method_name=method_id)
        self.seed = random_state
        self.config_space = config_space
        self.config_space.seed(self.seed)
        self.R = R
        self.num_iter = num_iter
        self.eta = eta
        self.logeta = lambda x: log(x) / log(self.eta)
        self.s_max = int(self.logeta(R))
        self.inner_iteration_n = (self.s_max + 1) * (self.s_max + 1)

        types, bounds = get_types(config_space)
        self.num_config = len(bounds)
        self.surrogate = RandomForestWithInstances(types=types, bounds=bounds)
        self.acquisition_func = EI(model=self.surrogate)
        # TODO: add SMAC's optimization algorithm.
        self.acq_optimizer = RandomSampling(self.acquisition_func, config_space,
                                            n_samples=max(500, 50 * self.num_config))

        self.incumbent_configs = []
        self.incumbent_obj = []

        self.lcnet_model = LC_ES()
        self.early_stop_gap = es_gap
        self.es_rho = rho
        self.lc_training_x = None
        self.lc_training_y = None

    def iterate(self):
        for _ in range(self.inner_iteration_n):
            T = self.choose_next(self.num_workers)

            extra_info = None
            lc_info = dict()
            lc_conf_mapping = dict()

            # assume no same configuration in the same batch: T
            for item in T:
                conf_id = get_configuration_id(item.get_dictionary())
                sha = hashlib.sha1(conf_id.encode('utf8'))
                conf_id = sha.hexdigest()
                lc_conf_mapping[conf_id] = item

            total_iter_num = self.R // self.early_stop_gap
            for iter_num in range(1, 1 + total_iter_num):
                self.logger.info('start iteration gap %d' % iter_num)
                ret_val, early_stops = self.run_in_parallel(T, self.early_stop_gap, extra_info)
                val_losses = [item['loss'] for item in ret_val]
                ref_list = [item['ref_id'] for item in ret_val]
                for item in ret_val:
                    conf_id = item['ref_id']
                    if not self.restart_needed:
                        if conf_id not in lc_info:
                            lc_info[conf_id] = []
                        lc_info[conf_id].extend(item['lc_info'])
                    else:
                        lc_info[conf_id] = item['lc_info']

                if iter_num == total_iter_num:
                    self.incumbent_configs.extend(T)
                    self.incumbent_obj.extend(val_losses)

                T = [config for i, config in enumerate(T) if not early_stops[i]]
                extra_info = [ref for i, ref in enumerate(ref_list) if not early_stops[i]]
                if len(T) == 0:
                    break

                if len(self.incumbent_obj) >= 2 * self.num_config and iter_num != total_iter_num:
                    # learning curve based early stop strategy.
                    ref_list = extra_info
                    early_stops = self.stop_early(T)
                    T = [config for i, config in enumerate(T) if not early_stops[i]]
                    extra_info = [ref for i, ref in enumerate(ref_list) if not early_stops[i]]
                if len(T) == 0:
                    break

            # keep learning curve data
            for item, config in lc_conf_mapping.items():
                lc_data = lc_info[item]
                if len(lc_data) > 0:
                    n_epochs = len(lc_data)
                    # self.logger.info('insert one learning curve data into dataset.')
                    t_idx = np.arange(1, n_epochs + 1) / n_epochs
                    conf_data = convert_configurations_to_array([config])
                    x = np.repeat(conf_data, t_idx.shape[0], axis=0)
                    x = np.concatenate((x, t_idx[:, None]), axis=1)
                    y = np.array(lc_data)
                    if self.lc_training_x is None:
                        self.lc_training_x, self.lc_training_y = x, y
                    else:
                        self.lc_training_x = np.concatenate((self.lc_training_x, x), 0)
                        self.lc_training_y = np.concatenate((self.lc_training_y, y), 0)
            self.logger.info('training data shape: %s' % str(self.lc_training_x.shape))
            if len(self.incumbent_obj) >= 2 * self.num_config and len(self.incumbent_obj) % self.num_config == 0:
                self.lcnet_model.train(self.lc_training_x, self.lc_training_y)

            self.add_stage_history(self.stage_id, self.global_incumbent)
            self.stage_id += 1
            self.remove_immediate_model()

    @BaseFacade.process_manage
    def run(self):
        try:
            for iter in range(self.num_iter):
                self.logger.info('-' * 50)
                self.logger.info("SMAC with ES algorithm: %d/%d iteration starts" % (iter, self.num_iter))
                start_time = time.time()
                self.iterate()
                time_elapsed = (time.time() - start_time) / 60
                self.logger.info("iteration took %.2f min." % time_elapsed)
                self.save_intemediate_statistics()
        except Exception as e:
            print(e)
            self.logger.error(str(e))
            # clear the immediate result.
            self.remove_immediate_model()

    def stop_early(self, T):
        configs_data = convert_configurations_to_array(T)
        x_test = None
        for i in range(configs_data.shape[0]):
            x = np.concatenate((configs_data[i, None, :], np.array([[1.0]])), axis=1)
            if x_test is None:
                x_test = x
            else:
                x_test = np.concatenate((x_test, x), 0)

        m, v = self.lcnet_model.predict(x_test)
        best_accuracy = 1 - self.global_incumbent
        s = np.sqrt(v)
        less_p = norm.cdf((best_accuracy - m) / s)
        self.logger.info('early stop prob: %s' % str(less_p))
        early_stop_flag = (less_p >= self.es_rho)

        self.logger.info('early stop vector: %s' % str(early_stop_flag))

        for i, flag in enumerate(early_stop_flag):
            if flag:
                self.incumbent_configs.append(T[i])
                self.incumbent_obj.append(m[i])
        return early_stop_flag

    def choose_next(self, num_config):
        if len(self.incumbent_obj) < 2 * self.num_config:
            return sample_configurations(self.config_space, num_config)

        # print('choose next starts!')
        self.logger.info('train feature is: %s' % str(self.incumbent_configs[-5:]))
        self.logger.info('train target is: %s' % str(self.incumbent_obj))

        self.surrogate.train(convert_configurations_to_array(self.incumbent_configs),
                             np.array(self.incumbent_obj, dtype=np.float64))

        conf_cnt = 0
        total_cnt = 0
        next_configs = []
        while conf_cnt < num_config and total_cnt < 5 * num_config:
            incumbent = dict()
            best_index = np.argmin(self.incumbent_obj)
            incumbent['obj'] = self.incumbent_obj[best_index]
            incumbent['config'] = self.incumbent_configs[best_index]

            self.acquisition_func.update(model=self.surrogate, eta=incumbent)
            rand_config = self.acq_optimizer.maximize(batch_size=1)[0]
            if rand_config not in next_configs:
                next_configs.append(rand_config)
                conf_cnt += 1
            total_cnt += 1
        if conf_cnt < num_config:
            next_configs = expand_configurations(next_configs, self.config_space, num_config)
        return next_configs

    def get_incumbent(self, num_inc=1):
        assert (len(self.incumbent_obj) == len(self.incumbent_configs))
        indices = np.argsort(self.incumbent_obj)
        configs = [self.incumbent_configs[i] for i in indices[0:num_inc]]
        targets = [self.incumbent_obj[i] for i in indices[0: num_inc]]
        return configs, targets

Example #12

Show file

class BOHB(BaseFacade):
    """ The implementation of BOHB.
        The paper can be found in https://arxiv.org/abs/1807.01774 .
    """
    def __init__(self,
                 config_space: ConfigurationSpace,
                 objective_func,
                 R,
                 num_iter=10000,
                 eta=3,
                 p=0.3,
                 n_workers=1,
                 random_state=1,
                 method_id='Default'):
        BaseFacade.__init__(self,
                            objective_func,
                            n_workers=n_workers,
                            method_name=method_id)
        self.config_space = config_space
        self.seed = random_state
        self.config_space.seed(self.seed)
        self.p = p
        self.R = R
        self.eta = eta
        self.logeta = lambda x: log(x) / log(self.eta)
        self.s_max = int(self.logeta(self.R))
        self.B = (self.s_max + 1) * self.R
        self.num_iter = num_iter

        types, bounds = get_types(config_space)
        self.num_config = len(bounds)
        self.surrogate = RandomForestWithInstances(types=types, bounds=bounds)
        self.acquisition_func = EI(model=self.surrogate)
        self.acq_optimizer = RandomSampling(self.acquisition_func,
                                            config_space,
                                            n_samples=max(
                                                500, 50 * self.num_config))

        self.incumbent_configs = []
        self.incumbent_obj = []

    def iterate(self, skip_last=0):

        for s in reversed(range(self.s_max + 1)):
            # Set initial number of configurations
            n = int(ceil(self.B / self.R / (s + 1) * self.eta**s))
            # Set initial number of iterations per config
            r = self.R * self.eta**(-s)

            # Sample n configurations according to BOHB strategy.
            T = self.choose_next(n)
            extra_info = None
            last_run_num = None
            for i in range((s + 1) - int(skip_last)):  # changed from s + 1
                # Run each of the n configs for <iterations>
                # and keep best (n_configs / eta) configurations.

                n_configs = n * self.eta**(-i)
                n_iterations = r * self.eta**(i)

                n_iter = n_iterations
                if last_run_num is not None and not self.restart_needed:
                    n_iter -= last_run_num
                last_run_num = n_iterations

                self.logger.info(
                    "BOHB: %d configurations x %d iterations each" %
                    (int(n_configs), int(n_iterations)))

                ret_val, early_stops = self.run_in_parallel(
                    T, n_iter, extra_info)
                val_losses = [item['loss'] for item in ret_val]
                ref_list = [item['ref_id'] for item in ret_val]

                if int(n_iterations) == self.R:
                    self.incumbent_configs.extend(T)
                    self.incumbent_obj.extend(val_losses)

                # Select a number of best configurations for the next loop.
                # Filter out early stops, if any.
                indices = np.argsort(val_losses)
                if len(T) == sum(early_stops):
                    break
                if len(T) >= self.eta:
                    T = [T[i] for i in indices if not early_stops[i]]
                    extra_info = [
                        ref_list[i] for i in indices if not early_stops[i]
                    ]
                    reduced_num = int(n_configs / self.eta)
                    T = T[0:reduced_num]
                    extra_info = extra_info[0:reduced_num]
                else:
                    T = [T[indices[0]]]
                    extra_info = [ref_list[indices[0]]]
                incumbent_loss = val_losses[indices[0]]
                self.add_stage_history(
                    self.stage_id, min(self.global_incumbent, incumbent_loss))
                self.stage_id += 1
            self.remove_immediate_model()

    @BaseFacade.process_manage
    def run(self):
        try:
            for iter in range(self.num_iter):
                self.logger.info('-' * 50)
                self.logger.info("BOHB algorithm: %d/%d iteration starts" %
                                 (iter, self.num_iter))
                start_time = time.time()
                self.iterate()
                time_elapsed = (time.time() - start_time) / 60
                self.logger.info("Iteration took %.2f min." % time_elapsed)
                self.save_intemediate_statistics()
        except Exception as e:
            print(e)
            self.logger.error(str(e))
            # Clean the immediate result.
            self.remove_immediate_model()

    def choose_next(self, num_config):
        if len(self.incumbent_obj) < 3:
            return sample_configurations(self.config_space, num_config)

        self.logger.info('Train feature is: %s' %
                         str(self.incumbent_configs[:5]))
        self.logger.info('Train target is: %s' % str(self.incumbent_obj))
        self.surrogate.train(
            convert_configurations_to_array(self.incumbent_configs),
            np.array(self.incumbent_obj, dtype=np.float64))

        config_cnt = 0
        total_sample_cnt = 0
        config_candidates = []
        while config_cnt < num_config and total_sample_cnt < 3 * num_config:
            if random.random() < self.p:
                rand_config = self.config_space.sample_configuration(1)
            else:
                # print('use surrogate to produce candidate.')
                incumbent = dict()
                best_index = np.argmin(self.incumbent_obj)
                incumbent['obj'] = self.incumbent_obj[best_index]
                incumbent['config'] = self.incumbent_configs[best_index]

                self.acquisition_func.update(model=self.surrogate,
                                             eta=incumbent)
                rand_config = self.acq_optimizer.maximize(batch_size=1)[0]
            if rand_config not in config_candidates:
                config_candidates.append(rand_config)
                config_cnt += 1
            total_sample_cnt += 1
        if config_cnt < num_config:
            config_candidates = expand_configurations(config_candidates,
                                                      self.config_space,
                                                      num_config)
        return config_candidates

    def get_incumbent(self, num_inc=1):
        assert (len(self.incumbent_obj) == len(self.incumbent_configs))
        indices = np.argsort(self.incumbent_obj)
        configs = [self.incumbent_configs[i] for i in indices[0:num_inc]]
        targets = [self.incumbent_obj[i] for i in indices[0:num_inc]]
        return configs, targets