Ejemplo n.º 1
0
    def build_single_surrogate(self, X: np.ndarray, y: np.array, normalize):
        assert normalize in ['standardize', 'scale', 'none']
        # Construct hyperspace with meta-features.
        config_space = ConfigurationSpace()
        for hp in self.config_space.get_hyperparameters():
            config_space.add_hyperparameter(hp)
        for cond in self.config_space.get_conditions():
            config_space.add_condition(cond)
        for bid in self.config_space.get_forbiddens():
            config_space.add_forbidden_clause(bid)
        _meta_feature_size = X.shape[1] - len(
            self.config_space.get_hyperparameters())
        for _idx in range(_meta_feature_size):
            _meta_hp = UniformFloatHyperparameter("meta_feature_%d" % _idx,
                                                  0.,
                                                  1.,
                                                  default_value=0.,
                                                  log=False)
            config_space.add_hyperparameter(_meta_hp)

        model = build_model(self.surrogate_type, config_space,
                            np.random.RandomState(self.random_seed))
        if normalize == 'standardize':
            if (y == y[0]).all():
                y[0] += 1e-4
            y, _, _ = zero_mean_unit_var_normalization(y)
        elif normalize == 'scale':
            if (y == y[0]).all():
                y[0] += 1e-4
            y, _, _ = zero_one_normalization(y)
        else:
            pass

        model.train(X, y)
        return model
Ejemplo n.º 2
0
    def build_source_surrogates(self, normalize):
        print('start to train base surrogates.')
        start_time = time.time()
        self.source_surrogates = list()
        for hpo_evaluation_data in self.source_hpo_data:
            print('.', end='')
            model = build_model(self.surrogate_type, self.config_space,
                                np.random.RandomState(self.random_seed))
            _X, _y = list(), list()
            for _config, _config_perf in hpo_evaluation_data.items():
                _X.append(_config)
                _y.append(_config_perf)
            X = convert_configurations_to_array(_X)
            y = np.array(_y, dtype=np.float64)
            X = X[:self.num_src_hpo_trial]
            y = y[:self.num_src_hpo_trial]

            if normalize == 'standardize':
                if (y == y[0]).all():
                    y[0] += 1e-4
                y, _, _ = zero_mean_unit_var_normalization(y)
            elif normalize == 'scale':
                if (y == y[0]).all():
                    y[0] += 1e-4
                y, _, _ = zero_one_normalization(y)
                y = 2 * y - 1.
            else:
                raise ValueError('Invalid parameter in norm.')

            self.eta_list.append(np.min(y))
            model.train(X, y)
            self.source_surrogates.append(model)
        print()
        print('Building base surrogates took %.3fs.' %
              (time.time() - start_time))
Ejemplo n.º 3
0
    def build_single_surrogate(self, X: np.ndarray, y: np.array, normalize):
        assert normalize in ['standardize', 'scale', 'none']
        model = build_model(self.surrogate_type, self.config_space,
                            np.random.RandomState(self.random_seed))
        if normalize == 'standardize':
            if (y == y[0]).all():
                y[0] += 1e-4
            y, _, _ = zero_mean_unit_var_normalization(y)
        elif normalize == 'scale':
            if (y == y[0]).all():
                y[0] += 1e-4
            y, _, _ = zero_one_normalization(y)
        else:
            pass

        model.train(X, y)
        return model
Ejemplo n.º 4
0
    def build_classifier(self):
        # Train the binary classifier.
        print('start to train binary classifier.')
        start_time = time.time()
        self.space_classifier = list()
        normalize = 'standardize'
        for hpo_evaluation_data in self.source_hpo_data:
            print('.', end='')
            _X, _y = list(), list()
            for _config, _config_perf in hpo_evaluation_data.items():
                _X.append(_config)
                _y.append(_config_perf)
            X = convert_configurations_to_array(_X)
            y = np.array(_y, dtype=np.float64)
            X = X[:self.num_src_hpo_trial]
            y = y[:self.num_src_hpo_trial]

            if normalize == 'standardize':
                if (y == y[0]).all():
                    y[0] += 1e-4
                y, _, _ = zero_mean_unit_var_normalization(y)
            elif normalize == 'scale':
                if (y == y[0]).all():
                    y[0] += 1e-4
                y, _, _ = zero_one_normalization(y)
                y = 2 * y - 1.
            else:
                raise ValueError('Invalid parameter in norm.')

            percentile = np.percentile(y, 30)
            space_label = np.array(np.array(y) < percentile)
            if (np.array(y) == percentile).all():
                raise ValueError('assertion violation: the same eval values!')
            if (space_label[0] == space_label).all():
                space_label = np.array(np.array(y) < np.mean(y))
                print('Label treatment triggers!')

            from sklearn.pipeline import make_pipeline
            from sklearn.preprocessing import StandardScaler
            from sklearn.svm import SVC
            clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
            clf.fit(X, space_label)
            self.space_classifier.append(clf)
        print('Building base classifier took %.3fs.' %
              (time.time() - start_time))
    def prepare_classifier(self, task_ids, percentiles):
        # Train the binary classifier.
        print('Train binary classifiers.')
        start_time = time.time()
        self.space_classifier = [None] * len(self.source_hpo_data)
        normalize = 'standardize'

        for _task_id in task_ids:
            hpo_evaluation_data = self.source_hpo_data[_task_id]
            percentile_v = percentiles[_task_id]

            print('.', end='')
            _X, _y = list(), list()
            for _config, _config_perf in hpo_evaluation_data.items():
                _X.append(_config)
                _y.append(_config_perf)
            X = convert_configurations_to_array(_X)
            y = np.array(_y, dtype=np.float64)
            X = X[:self.num_src_hpo_trial]
            y = y[:self.num_src_hpo_trial]

            if normalize == 'standardize':
                if (y == y[0]).all():
                    y[0] += 1e-4
                y, _, _ = zero_mean_unit_var_normalization(y)
            elif normalize == 'scale':
                if (y == y[0]).all():
                    y[0] += 1e-4
                y, _, _ = zero_one_normalization(y)
                y = 2 * y - 1.
            else:
                raise ValueError('Invalid parameter in norm.')

            percentile = np.percentile(y, percentile_v)
            unique_ys = sorted(list(set(y)))
            if len(unique_ys) >= 2 and percentile <= unique_ys[0]:
                percentile = unique_ys[1]

            space_label = np.array(np.array(y) < percentile)
            if (np.array(y) == percentile).all():
                raise ValueError('Assertion violation: The same eval values!')
            if (space_label[0] == space_label).all():
                space_label = np.array(np.array(y) < np.mean(y))
                if (space_label[0] == space_label).all():
                    raise ValueError('Warning: Label treatment triggers!')
                else:
                    print('Warning: Label treatment triggers!')

            if self.clf_type == 'svm':
                clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
            elif self.clf_type == 'rf':
                clf = make_pipeline(
                    StandardScaler(),
                    RandomForestClassifier(n_estimators=50, max_depth=4))
            elif self.clf_type == 'knn':
                clf = make_pipeline(StandardScaler(),
                                    KNeighborsClassifier(n_neighbors=5))
            elif self.clf_type == 'gp':
                clf = make_pipeline(StandardScaler(),
                                    GaussianProcessClassifier())
            # print('Labels', space_label)
            # print('sum', np.sum(space_label))
            clf.fit(X, space_label)
            self.space_classifier[_task_id] = clf
        print('Building base classifier took %.3fs.' %
              (time.time() - start_time))
    def choose_next(self, X: np.ndarray, Y: np.ndarray):
        """
        Step 1. sample a batch of random configs.
        Step 2. identify and preserve the configs in the good regions (union)
        Step 3. calculate their acquisition functions and choose the config with the largest value.
        Parameters
        ----------
        X
        Y

        Returns
        -------
        the config to evaluate next.
        """

        _config_num = X.shape[0]
        if _config_num < self.init_num:
            if self.initial_configurations is None:
                default_config = self.config_space.get_default_configuration()
                if default_config not in self.configuration_list:
                    default_config = self.configuration_list[0]
                if default_config not in (self.configurations +
                                          self.failed_configurations):
                    config = default_config
                else:
                    config = self.sample_random_config()[0]
                return config
            else:
                print('This is a config for warm-start!')
                return self.initial_configurations[_config_num]

        start_time = time.time()
        self.model.train(X, Y)
        print('Training surrogate model took %.3f' %
              (time.time() - start_time))

        if self.model.method_id in ['tst', 'tstm', 'pogpe']:
            y_, _, _ = zero_one_normalization(Y)
        elif self.model.method_id in ['scot']:
            y_ = Y.copy()
        else:
            y_, _, _ = zero_mean_unit_var_normalization(Y)
        incumbent_value = np.min(y_)

        if self.acq_func == 'ei':
            self.acquisition_function.update(model=self.model,
                                             eta=incumbent_value,
                                             num_data=len(
                                                 self.history_container.data))
        else:
            raise ValueError('invalid acquisition function ~ %s.' %
                             self.acq_func)

        # Select space
        X_candidate = self.get_X_candidate()

        # Check space
        self.check_space(X_candidate)

        if self.rng.rand() < self.get_random_prob(self.iteration_id):
            excluded_set = list()
            candidate_set = set(X_candidate)
            for _config in self.configuration_list:
                if _config not in candidate_set:
                    excluded_set.append(_config)
            if len(excluded_set) == 0:
                excluded_set = self.configuration_list

            config = self.sample_random_config(config_set=excluded_set)[0]
            if len(self.model.target_weight) == 0:
                self.model.target_weight.append(0.)
            else:
                self.model.target_weight.append(self.model.target_weight[-1])
            print('Config sampled randomly.')
            return config

        acq_optimizer = OfflineSearch(X_candidate,
                                      self.acquisition_function,
                                      self.config_space,
                                      rng=np.random.RandomState(
                                          self.random_seed))

        start_time = time.time()
        sorted_configs = acq_optimizer.maximize(
            runhistory=self.history_container, num_points=5000)
        print('Optimizing Acq. func took %.3f' % (time.time() - start_time))
        for _config in sorted_configs:
            if _config not in (self.configurations +
                               self.failed_configurations):
                return _config

        print('[Warning] Reach unexpected?')
        excluded_set = list()
        candidate_set = set(X_candidate)
        for _config in self.configuration_list:
            if _config not in candidate_set and _config not in (
                    self.configurations + self.failed_configurations):
                excluded_set.append(_config)
        if len(excluded_set) == 0:
            excluded_set = self.configuration_list
        return self.sample_random_config(config_set=excluded_set)[0]
Ejemplo n.º 7
0
    def choose_next(self, X: np.ndarray, Y: np.ndarray):
        _config_num = X.shape[0]
        if _config_num < self.init_num:
            if self.initial_configurations is None:
                default_config = self.config_space.get_default_configuration()
                if default_config not in self.configuration_list:
                    default_config = self.configuration_list[0]
                if default_config not in (self.configurations +
                                          self.failed_configurations):
                    config = default_config
                else:
                    config = self.sample_random_config()[0]
                return config
            else:
                print('This is a config for warm-start!')
                return self.initial_configurations[_config_num]

        if self.random_configuration_chooser.check(self.iteration_id):
            config = self.sample_random_config()[0]
            if len(self.model.target_weight) == 0:
                self.model.target_weight.append(0.)
            else:
                self.model.target_weight.append(self.model.target_weight[-1])
            return config
        else:
            start_time = time.time()
            self.model.train(X, Y)
            print('Training surrogate model took %.3f' %
                  (time.time() - start_time))

            incumbent_value = self.history_container.get_incumbents()[0][1]
            if self.model.method_id in ['tst', 'tstm']:
                y_, _, _ = zero_one_normalization(Y)
                incumbent_value = np.min(y_)
            else:
                y_, _, _ = zero_mean_unit_var_normalization(Y)
                incumbent_value = np.min(y_)

            if self.acq_func == 'ei':
                self.acquisition_function.update(
                    model=self.model,
                    eta=incumbent_value,
                    num_data=len(self.history_container.data))
            elif self.acq_func == 'taf':
                self.acquisition_function.update_target_model(
                    self.model.target_surrogate,
                    incumbent_value,
                    num_data=len(self.history_container.data),
                    model_weights=self.model.w)
            else:
                raise ValueError('invalid acquisition function ~ %s.' %
                                 self.acq_func)

            start_time = time.time()
            sorted_configs = self.acq_optimizer.maximize(
                runhistory=self.history_container,
                num_points=5000,
            )
            print('optimizing acq func took', time.time() - start_time)
            for _config in sorted_configs:
                if _config not in (self.configurations +
                                   self.failed_configurations):
                    return _config
            raise ValueError('The configuration in the SET (%d) is over' %
                             len(self.configuration_list))